In [1]:
import pandas as pd
import numpy as np
import os
pd.set_option('display.max_rows', 1000)

In [2]:
brc_files = os.listdir('../datasets/barcelona')
brc_files

['tomslee_airbnb_barcelona_0020_2014-05-20.csv',
 'tomslee_airbnb_barcelona_0051_2014-09-04.csv',
 'tomslee_airbnb_barcelona_0070_2015-01-14.csv',
 'tomslee_airbnb_barcelona_0114_2015-04-29.csv',
 'tomslee_airbnb_barcelona_0199_2015-11-06.csv',
 'tomslee_airbnb_barcelona_0442_2016-05-30.csv',
 'tomslee_airbnb_barcelona_0501_2016-07-25.csv',
 'tomslee_airbnb_barcelona_0546_2016-08-28.csv',
 'tomslee_airbnb_barcelona_0620_2016-10-22.csv',
 'tomslee_airbnb_barcelona_0690_2016-12-10.csv',
 'tomslee_airbnb_barcelona_0808_2017-01-18.csv',
 'tomslee_airbnb_barcelona_0891_2017-02-18.csv',
 'tomslee_airbnb_barcelona_0999_2017-03-27.csv',
 'tomslee_airbnb_barcelona_1076_2017-04-15.csv',
 'tomslee_airbnb_barcelona_1145_2017-04-24.csv',
 'tomslee_airbnb_barcelona_1225_2017-05-12.csv',
 'tomslee_airbnb_barcelona_1349_2017-06-15.csv',
 'tomslee_airbnb_barcelona_1477_2017-07-23.csv']

In [3]:
barcelona = pd.concat([pd.read_csv('../datasets/barcelona/' + brc_files[i]) for i in range(0, 13)], axis = 0, ignore_index = True)
barcelona2 = pd.concat([pd.read_csv('../datasets/barcelona/' + brc_files[i]) for i in range(13, len(brc_files))], axis = 0, ignore_index = True)

In [4]:
barcelona = pd.concat([barcelona, barcelona2[barcelona.columns]], ignore_index=True)
barcelona = barcelona.sort_values(['room_id', 'last_modified']).reset_index(drop=True)
barcelona.head(100)

Unnamed: 0,room_id,host_id,room_type,borough,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,minstay,latitude,longitude,last_modified
0,2918,3257,Private room,,Ciutat Vella,1,5.0,1.0,1.0,38.0,2.0,41.378576,2.188436,2014-05-21 03:12:56.726
1,7670,20617,Entire home/apt,,Ciutat Vella,119,4.5,2.0,,55.0,2.0,41.386942,2.183842,2015-11-13 18:58:48.009865
2,7670,20617,Entire home/apt,,Ciutat Vella,143,4.5,2.0,0.0,55.0,,41.386942,2.183842,2016-12-11 08:33:37.512214
3,7670,20617,Entire home/apt,,Ciutat Vella,143,4.5,2.0,0.0,56.0,,41.386942,2.183842,2017-05-13 02:52:42.019533
4,7670,20617,Entire home/apt,,Ciutat Vella,143,4.5,2.0,0.0,58.0,,41.386942,2.183842,2017-06-16 08:42:10.162091
5,10132,34580,Private room,,Ciutat Vella,124,5.0,2.0,1.0,66.0,2.0,41.38588,2.185098,2014-05-21 04:45:10.698
6,10938,39550,Entire home/apt,,Nou Barris,17,5.0,4.0,2.0,108.0,3.0,41.432447,2.17098,2014-05-21 18:41:27.195
7,10938,39550,Entire home/apt,,Nou Barris,19,5.0,4.0,2.0,102.0,3.0,41.432447,2.17098,2014-09-20 09:56:22.954
8,10938,39550,Entire home/apt,,Nou Barris,19,5.0,4.0,2.0,102.0,3.0,41.432447,2.17098,2015-01-14 21:46:00.806
9,10938,39550,Entire home/apt,,Nou Barris,19,5.0,4.0,2.0,105.0,3.0,41.432447,2.17098,2015-05-02 21:19:47.655


In [5]:
barcelona['minstay'].isna().sum()/barcelona.shape[0]
# 68% of values in the minstay column are NaNs. However, many rows are repeated for the same room, and some rows have the minstay information while others don't. Let's see what happens if we groupby the room_id and aggregate by the average of the minstay values.

0.5810972077799722

In [6]:
avg_minstay = barcelona.groupby('room_id')['minstay'].mean()
print(avg_minstay.count()/avg_minstay.shape[0]) # The 'count' function counts all the non-NaN values. This means that if we divide the result of count by the total rows, we'll have the ratio of valid values.
# So now here we have 74% of each unique room to have at least one value in the minstay column. We'll have to find another criteria to decide which rows to delete with a NaN value in the minstay column.

0.7392381896518855


In [7]:
# Let's see if we can find some other missing values in those rooms with only NaN values
rooms_no_minstay = pd.concat([barcelona[barcelona['room_id'] == index] for index in avg_minstay[avg_minstay.isnull()].index])
rooms_no_minstay

Unnamed: 0,room_id,host_id,room_type,borough,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,minstay,latitude,longitude,last_modified
474,34565,149191,Private room,,Gràcia,3,5.0,1.0,1.0,33.0,,41.410480,2.162545,2016-12-10 03:49:25.341636
475,34565,149191,Private room,,Gràcia,5,5.0,1.0,1.0,33.0,,41.410480,2.162545,2017-01-18 12:04:40.487307
476,34565,149191,Private room,,Gràcia,9,5.0,1.0,1.0,33.0,,41.410480,2.162545,2017-02-18 04:26:25.119864
477,34565,149191,Private room,,Gràcia,14,5.0,1.0,1.0,33.0,,41.410480,2.162545,2017-03-27 18:28:45.072389
478,34565,149191,Private room,,Gràcia,15,5.0,1.0,1.0,33.0,,41.410480,2.162545,2017-04-15 08:38:58.615315
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
286577,20089199,759057,Entire home/apt,,Gràcia,0,0.0,2.0,1.0,344.0,,41.402014,2.168469,2017-07-28 12:51:27.760925
286578,20090140,1633304,Entire home/apt,,Ciutat Vella,0,0.0,4.0,3.0,181.0,,41.375107,2.174296,2017-07-28 11:53:02.094899
286579,20091366,124283414,Entire home/apt,,Gràcia,0,0.0,4.0,2.0,102.0,,41.403435,2.166943,2017-07-28 13:02:32.683058
286580,20093382,142887934,Entire home/apt,,Ciutat Vella,0,0.0,4.0,1.0,144.0,,41.378919,2.165175,2017-07-28 11:43:28.848047


In [8]:
rooms_no_minstay = rooms_no_minstay.sort_values('price', ascending=False)
rooms_no_minstay

Unnamed: 0,room_id,host_id,room_type,borough,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,minstay,latitude,longitude,last_modified
263367,16738878,45764838,Entire home/apt,,Sarrià-Sant Gervasi,0,0.0,12.0,5.0,16417.0,,41.409863,2.142526,2017-06-16 16:45:56.042314
263361,16738878,45764838,Entire home/apt,,Sarrià-Sant Gervasi,0,0.0,12.0,5.0,10300.0,,41.406391,2.140634,2017-01-19 03:16:19.235430
263364,16738878,45764838,Entire home/apt,,Sarrià-Sant Gervasi,0,0.0,12.0,5.0,10300.0,,41.409863,2.142526,2017-04-15 22:23:14.250953
263363,16738878,45764838,Entire home/apt,,Sarrià-Sant Gervasi,0,0.0,12.0,5.0,10300.0,,41.409863,2.142526,2017-03-28 09:35:38.649312
263362,16738878,45764838,Entire home/apt,,Sarrià-Sant Gervasi,0,0.0,12.0,5.0,10300.0,,41.409863,2.142526,2017-02-18 19:37:39.950275
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274576,17806078,31590542,Private room,,Gràcia,0,0.0,1.0,1.0,9.0,,41.402914,2.162944,2017-05-12 17:50:43.219581
274577,17806078,31590542,Private room,,Gràcia,0,0.0,1.0,1.0,9.0,,41.402914,2.162944,2017-06-15 20:33:56.122214
179648,10146190,51058942,,,Eixample,1,,2.0,,,,41.395900,2.165507,2016-07-25 15:52:36.179404
203309,12376153,54310958,,,Gràcia,0,,3.0,,,,41.400101,2.157231,2016-08-28 13:34:30.695300


In [9]:
rooms_no_minstay = rooms_no_minstay.sort_values('overall_satisfaction', ascending=False)
rooms_no_minstay

Unnamed: 0,room_id,host_id,room_type,borough,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,minstay,latitude,longitude,last_modified
278117,18194567,17567595,Entire home/apt,,Ciutat Vella,5,5.0,2.0,1.0,87.0,,41.387038,2.179292,2017-05-13 03:49:54.962600
258902,16292600,15944590,Entire home/apt,,Eixample,4,5.0,5.0,2.0,215.0,,41.386069,2.162638,2017-04-25 02:16:12.288606
251313,15599286,100535573,Entire home/apt,,Sant Martí,8,5.0,4.0,1.0,82.0,,41.413256,2.190612,2017-04-24 19:27:40.557289
150777,7610490,39328627,Entire home/apt,,Eixample,8,5.0,6.0,2.0,214.0,,41.387470,2.165617,2016-08-28 07:07:23.320560
261645,16543650,108875874,Private room,,Ciutat Vella,3,5.0,3.0,1.0,82.0,,41.386240,2.175915,2017-07-23 21:31:40.843155
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
274575,17806078,31590542,Private room,,Gràcia,0,,1.0,1.0,10.0,,41.402914,2.162944,2017-04-25 06:43:47.864852
277163,18087350,25356683,Private room,,Eixample,0,,2.0,1.0,10.0,,41.399364,2.178922,2017-04-25 06:42:48.797866
179648,10146190,51058942,,,Eixample,1,,2.0,,,,41.395900,2.165507,2016-07-25 15:52:36.179404
203309,12376153,54310958,,,Gràcia,0,,3.0,,,,41.400101,2.157231,2016-08-28 13:34:30.695300


In [10]:
indexes_to_drop = [179648, 203309, 218989]
barcelona = barcelona.drop(indexes_to_drop)

In [11]:
barcelona = barcelona.drop(['borough', 'latitude', 'longitude', 'minstay'], axis=1)

In [12]:
barcelona.sort_values('price').head(100)

Unnamed: 0,room_id,host_id,room_type,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,last_modified
55557,1823947,9547606,Shared room,Sants-Montjuïc,41,4.0,16.0,1.0,9.0,2016-10-22 21:01:16.094674
274576,17806078,31590542,Private room,Gràcia,0,0.0,1.0,1.0,9.0,2017-05-12 17:50:43.219581
274577,17806078,31590542,Private room,Gràcia,0,0.0,1.0,1.0,9.0,2017-06-15 20:33:56.122214
233912,14136311,85306726,Private room,Eixample,2,0.0,3.0,1.0,10.0,2017-06-15 22:49:30.209987
230374,13978164,15920367,Private room,Eixample,13,4.5,2.0,1.0,10.0,2017-01-18 13:25:04.794527
230375,13978164,15920367,Private room,Eixample,13,4.5,2.0,1.0,10.0,2017-02-18 05:10:06.879080
90449,3390888,17107294,Private room,Ciutat Vella,275,5.0,2.0,1.0,10.0,2017-06-15 20:35:17.759024
90456,3390901,17107294,Private room,Ciutat Vella,170,5.0,2.0,1.0,10.0,2016-07-26 02:07:09.532721
55558,1823947,9547606,Shared room,Sants-Montjuïc,44,4.0,16.0,1.0,10.0,2016-12-13 07:46:47.845085
90457,3390901,17107294,Private room,Ciutat Vella,183,5.0,2.0,1.0,10.0,2016-08-28 15:59:56.306458


In [13]:
no_room_type = barcelona[barcelona['room_type'].isnull()]
no_room_type

Unnamed: 0,room_id,host_id,room_type,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,last_modified
12183,534076,1860990,,Gràcia,0,,,,258.0,2015-11-06 03:10:56.049032
37470,1188965,6500377,,Gràcia,7,4.5,6.0,,,2016-08-28 03:11:38.212657
38069,1197458,6541786,,Sant Martí,163,4.5,3.0,,,2016-07-25 20:53:00.078305
71435,2565960,10034965,,Eixample,80,5.0,2.0,,,2016-07-25 11:59:19.796379
100415,3873476,9877338,,Sarrià-Sant Gervasi,0,,2.0,,,2016-07-25 20:16:07.596825
114564,4865651,13053014,,Eixample,11,4.5,,,34.0,2015-11-06 20:16:27.569130
120751,5380230,27889800,,Eixample,30,4.0,,,34.0,2015-11-06 13:55:30.359681
124664,5744924,29798056,,Ciutat Vella,29,4.5,,,55.0,2015-11-13 15:01:52.640411
124700,5748010,3366752,,Eixample,5,5.0,,,39.0,2015-11-13 20:39:33.391295
132813,6457149,33719062,,Ciutat Vella,10,5.0,,,45.0,2015-11-06 02:39:27.644819


In [14]:
# Seems that all the rooms with room_type null value are missing many other information. At this point it would be best to delete these rows.
barcelona = barcelona.drop(no_room_type.index)

In [15]:
# Now we must adjust the NaN values in the bedrooms column. For single-row bedroom, it would be best to substitute the NaN value with the average number of rooms the houses have for room type and number of accommodates.
bedrooms_avg = barcelona.groupby(['room_type', 'accommodates'])['bedrooms'].mean()
bedrooms_avg

room_type        accommodates
Entire home/apt  1.0             0.844221
                 2.0             0.857923
                 3.0             1.285185
                 4.0             1.582261
                 5.0             2.350058
                 6.0             2.524186
                 7.0             3.210481
                 8.0             3.333074
                 9.0             3.669049
                 10.0            4.005138
                 11.0            4.360743
                 12.0            4.466165
                 13.0            4.740000
                 14.0            4.320809
                 15.0            4.280374
                 16.0            6.626667
Private room     1.0             1.004551
                 2.0             1.005996
                 3.0             1.034008
                 4.0             1.096493
                 5.0             1.133401
                 6.0             1.314286
                 7.0             1.382075
    

In [16]:
# Let's extract for now the single-row rooms with NaN values in the bedroom column.
rooms_row_count = barcelona['room_id'].value_counts()
single_row_rooms = barcelona[barcelona['room_id'].isin(rooms_row_count[rooms_row_count == 1].index)]
single_row_rooms[single_row_rooms['bedrooms'].isnull() == True]

Unnamed: 0,room_id,host_id,room_type,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,last_modified
10969,501668,809388,Entire home/apt,Gràcia,7,5.0,2.0,,114.0,2014-05-21 05:18:03.624
29491,974834,4206971,Entire home/apt,Sarrià-Sant Gervasi,17,4.5,2.0,,85.0,2014-05-21 19:10:45.759
32978,1061567,686062,Entire home/apt,Ciutat Vella,5,4.5,2.0,,115.0,2014-05-21 06:09:31.288
45308,1369904,7127163,Entire home/apt,Ciutat Vella,0,,4.0,,230.0,2014-05-21 18:01:59.136
46020,1396490,1906609,Entire home/apt,Ciutat Vella,0,,3.0,,77.0,2014-05-21 00:18:30.037
52473,1669972,7261724,Entire home/apt,Ciutat Vella,3,5.0,16.0,,614.0,2014-05-21 02:39:19.670
61284,2101018,879799,Entire home/apt,Ciutat Vella,4,5.0,8.0,,107.0,2014-05-21 00:48:28.120
61816,2130772,3728940,Entire home/apt,Sant Martí,0,,6.0,,153.0,2015-01-14 17:00:40.356
78213,2877444,12534046,Entire home/apt,Eixample,0,,4.0,,176.0,2014-05-21 00:08:37.872
97428,3737351,19104573,Entire home/apt,Sants-Montjuïc,0,,4.0,,65.0,2015-01-14 13:38:21.828


In [17]:
# Apparently we have some rows that have multiple NaN values in other columns beside 'bedroom'. It would be best to just delete those few.
to_delete = single_row_rooms[(single_row_rooms['bedrooms'].isnull() == True) & (single_row_rooms['overall_satisfaction'].isnull() == True)]
barcelona.drop(to_delete.index, inplace=True)

In [18]:
# Now for the rest, we want to assign to the 'bedroom' column the average value we got by grouping by room types and number of accommodates
to_change = single_row_rooms[(single_row_rooms['bedrooms'].isnull() == True) & (single_row_rooms['overall_satisfaction'].isnull() == False)]
to_change
for index, row in to_change.iterrows():
    room_type = row['room_type']
    accommodates = row['accommodates']
    barcelona.loc[index, 'bedrooms'] = round(bedrooms_avg.loc[room_type, accommodates])
barcelona.loc[to_change.index]

Unnamed: 0,room_id,host_id,room_type,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,last_modified
10969,501668,809388,Entire home/apt,Gràcia,7,5.0,2.0,1.0,114.0,2014-05-21 05:18:03.624
29491,974834,4206971,Entire home/apt,Sarrià-Sant Gervasi,17,4.5,2.0,1.0,85.0,2014-05-21 19:10:45.759
32978,1061567,686062,Entire home/apt,Ciutat Vella,5,4.5,2.0,1.0,115.0,2014-05-21 06:09:31.288
52473,1669972,7261724,Entire home/apt,Ciutat Vella,3,5.0,16.0,7.0,614.0,2014-05-21 02:39:19.670
61284,2101018,879799,Entire home/apt,Ciutat Vella,4,5.0,8.0,3.0,107.0,2014-05-21 00:48:28.120
121881,5473355,28353643,Entire home/apt,Gràcia,6,4.0,2.0,1.0,51.0,2015-11-06 02:46:18.696278
157612,8047510,42355219,Entire home/apt,Eixample,8,4.5,2.0,1.0,45.0,2015-11-06 20:50:27.507921


In [19]:
no_bedrm_multi = barcelona[barcelona['bedrooms'].isnull()]
for index, row in no_bedrm_multi.iterrows():
    if barcelona.loc[index-1, 'room_id'] == barcelona.loc[index, 'room_id']:
        barcelona.loc[index, 'bedrooms'] = barcelona.loc[index-1, 'bedrooms']
    else: barcelona.loc[index, 'bedrooms'] = barcelona.loc[index+1, 'bedrooms']

In [20]:
barcelona[barcelona['bedrooms'].isnull()]

Unnamed: 0,room_id,host_id,room_type,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,last_modified
7689,366514,1850507,Entire home/apt,Ciutat Vella,5,5.0,2.0,,100.0,2014-05-21 05:44:46.371
7690,366514,1850507,Entire home/apt,Ciutat Vella,10,5.0,2.0,,102.0,2015-01-14 21:09:17.142
45551,1383917,3729717,Entire home/apt,Ciutat Vella,9,4.5,2.0,,100.0,2014-05-21 18:58:11.463
45552,1383917,3729717,Entire home/apt,Ciutat Vella,14,4.5,2.0,,1961.0,2015-05-03 01:24:57.658
74843,2733885,7296976,Entire home/apt,Gràcia,1,4.5,4.0,,156.0,2014-05-21 01:57:07.023
74844,2733885,7296976,Entire home/apt,Gràcia,6,5.0,4.0,,148.0,2015-01-14 16:00:38.065
74845,2733885,7296976,Entire home/apt,Gràcia,9,5.0,4.0,,112.0,2015-05-02 11:03:30.727
74846,2733885,7296976,Entire home/apt,Gràcia,13,5.0,4.0,,90.0,2015-11-06 17:54:02.020414
74847,2733885,7296976,Entire home/apt,Gràcia,20,5.0,4.0,,92.0,2016-05-31 05:17:52.041966
87073,3249944,14738687,Entire home/apt,Ciutat Vella,0,,3.0,,116.0,2015-01-14 14:23:35.790


In [21]:
barcelona.drop([112737, 161532], inplace=True)

In [22]:
to_change = barcelona[barcelona['bedrooms'].isnull()]
for index, row in to_change.iterrows():
    room_type = row['room_type']
    accommodates = row['accommodates']
    barcelona.loc[index, 'bedrooms'] = round(bedrooms_avg.loc[room_type, accommodates])
barcelona.loc[to_change.index]

Unnamed: 0,room_id,host_id,room_type,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,last_modified
7689,366514,1850507,Entire home/apt,Ciutat Vella,5,5.0,2.0,1.0,100.0,2014-05-21 05:44:46.371
7690,366514,1850507,Entire home/apt,Ciutat Vella,10,5.0,2.0,1.0,102.0,2015-01-14 21:09:17.142
45551,1383917,3729717,Entire home/apt,Ciutat Vella,9,4.5,2.0,1.0,100.0,2014-05-21 18:58:11.463
45552,1383917,3729717,Entire home/apt,Ciutat Vella,14,4.5,2.0,1.0,1961.0,2015-05-03 01:24:57.658
74843,2733885,7296976,Entire home/apt,Gràcia,1,4.5,4.0,2.0,156.0,2014-05-21 01:57:07.023
74844,2733885,7296976,Entire home/apt,Gràcia,6,5.0,4.0,2.0,148.0,2015-01-14 16:00:38.065
74845,2733885,7296976,Entire home/apt,Gràcia,9,5.0,4.0,2.0,112.0,2015-05-02 11:03:30.727
74846,2733885,7296976,Entire home/apt,Gràcia,13,5.0,4.0,2.0,90.0,2015-11-06 17:54:02.020414
74847,2733885,7296976,Entire home/apt,Gràcia,20,5.0,4.0,2.0,92.0,2016-05-31 05:17:52.041966
87073,3249944,14738687,Entire home/apt,Ciutat Vella,0,,3.0,1.0,116.0,2015-01-14 14:23:35.790


In [23]:
barcelona[barcelona['bedrooms'].isnull()]

Unnamed: 0,room_id,host_id,room_type,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,last_modified


In [24]:
barcelona[barcelona['reviews'].isnull()]

Unnamed: 0,room_id,host_id,room_type,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,last_modified


In [25]:
barcelona[barcelona['accommodates'].isnull()]

Unnamed: 0,room_id,host_id,room_type,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,last_modified
45,15090,59369,Entire home/apt,Ciutat Vella,101,4.0,,1.0,105.0,2015-05-03 01:47:35.364
46,15090,59369,Entire home/apt,Ciutat Vella,114,4.0,,1.0,84.0,2015-11-06 20:16:37.462965
65,18653,71615,Entire home/apt,Eixample,0,,,2.0,124.0,2015-11-14 07:39:14.714112
83,18666,71615,Entire home/apt,Sant Martí,0,,,2.0,130.0,2015-05-03 01:10:55.548
84,18666,71615,Entire home/apt,Sant Martí,1,4.0,,2.0,104.0,2015-11-06 17:48:09.717031
...,...,...,...,...,...,...,...,...,...,...
169923,9276554,6134910,Entire home/apt,Sant Martí,0,,,2.0,78.0,2015-11-06 18:11:31.630435
169952,9277818,22693122,Entire home/apt,Eixample,0,,,3.0,202.0,2015-11-06 10:38:24.939361
169962,9280624,20163925,Entire home/apt,Horta-Guinardó,0,,,1.0,39.0,2015-11-14 11:58:34.478485
170031,9291385,14132973,Entire home/apt,Eixample,0,,,3.0,168.0,2015-11-06 15:16:19.828977


In [26]:
accommodates_avg = barcelona.groupby('bedrooms')['accommodates'].mean()

In [27]:
to_change = barcelona[barcelona['accommodates'].isnull()]
for index, row in to_change.iterrows():
    bedroom = row['bedrooms']
    barcelona.loc[index, 'accommodates'] = round(accommodates_avg.loc[bedroom])
barcelona.loc[to_change.index]

Unnamed: 0,room_id,host_id,room_type,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,last_modified
45,15090,59369,Entire home/apt,Ciutat Vella,101,4.0,2.0,1.0,105.0,2015-05-03 01:47:35.364
46,15090,59369,Entire home/apt,Ciutat Vella,114,4.0,2.0,1.0,84.0,2015-11-06 20:16:37.462965
65,18653,71615,Entire home/apt,Eixample,0,,5.0,2.0,124.0,2015-11-14 07:39:14.714112
83,18666,71615,Entire home/apt,Sant Martí,0,,5.0,2.0,130.0,2015-05-03 01:10:55.548
84,18666,71615,Entire home/apt,Sant Martí,1,4.0,5.0,2.0,104.0,2015-11-06 17:48:09.717031
...,...,...,...,...,...,...,...,...,...,...
169923,9276554,6134910,Entire home/apt,Sant Martí,0,,5.0,2.0,78.0,2015-11-06 18:11:31.630435
169952,9277818,22693122,Entire home/apt,Eixample,0,,6.0,3.0,202.0,2015-11-06 10:38:24.939361
169962,9280624,20163925,Entire home/apt,Horta-Guinardó,0,,2.0,1.0,39.0,2015-11-14 11:58:34.478485
170031,9291385,14132973,Entire home/apt,Eixample,0,,6.0,3.0,168.0,2015-11-06 15:16:19.828977


In [28]:
barcelona[barcelona['accommodates'].isnull()]

Unnamed: 0,room_id,host_id,room_type,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,last_modified


In [29]:
barcelona[barcelona['reviews'] == 0]['overall_satisfaction'].value_counts()

0.0    26600
Name: overall_satisfaction, dtype: int64

In [30]:
to_change = barcelona[barcelona['overall_satisfaction'].isnull()]
for index, row in to_change.iterrows():
    if barcelona.loc[index, 'reviews'] == 0:
        barcelona.loc[index, 'overall_satisfaction'] = 0
barcelona.loc[to_change.index]

Unnamed: 0,room_id,host_id,room_type,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,last_modified
65,18653,71615,Entire home/apt,Eixample,0,0.0,5.0,2.0,124.0,2015-11-14 07:39:14.714112
66,18653,71615,Entire home/apt,Eixample,0,0.0,6.0,2.0,129.0,2016-05-31 15:53:33.796121
67,18653,71615,Entire home/apt,Eixample,0,0.0,6.0,2.0,127.0,2016-07-25 07:54:18.712816
68,18653,71615,Entire home/apt,Eixample,0,0.0,6.0,2.0,131.0,2016-08-28 03:46:47.107804
69,18653,71615,Entire home/apt,Eixample,0,0.0,6.0,2.0,126.0,2016-10-22 21:33:48.878696
...,...,...,...,...,...,...,...,...,...,...
279702,18400850,15850102,Private room,Gràcia,0,0.0,2.0,1.0,52.0,2017-04-25 13:51:28.623451
279709,18402161,34834548,Private room,Sant Martí,0,0.0,2.0,1.0,46.0,2017-04-25 13:52:14.293228
279710,18402398,127494838,Private room,Eixample,0,0.0,2.0,1.0,56.0,2017-04-25 13:51:07.225716
279720,18402947,94546994,Private room,Ciutat Vella,0,0.0,4.0,2.0,50.0,2017-04-25 13:57:07.323583


In [31]:
barcelona[barcelona['overall_satisfaction'].isnull()]['reviews'].value_counts()

1    8932
2    5779
3      62
4       6
5       5
6       4
7       1
Name: reviews, dtype: int64

In [32]:
barcelona.drop(barcelona[barcelona['overall_satisfaction'].isnull()].index, inplace = True)

In [36]:
barcelona

Unnamed: 0,room_id,host_id,room_type,neighborhood,reviews,overall_satisfaction,accommodates,bedrooms,price,last_modified
0,2918,3257,Private room,Ciutat Vella,1,5.0,1.0,1.0,38.0,2014-05-21 03:12:56.726
1,7670,20617,Entire home/apt,Ciutat Vella,119,4.5,2.0,0.0,55.0,2015-11-13 18:58:48.009865
2,7670,20617,Entire home/apt,Ciutat Vella,143,4.5,2.0,0.0,55.0,2016-12-11 08:33:37.512214
3,7670,20617,Entire home/apt,Ciutat Vella,143,4.5,2.0,0.0,56.0,2017-05-13 02:52:42.019533
4,7670,20617,Entire home/apt,Ciutat Vella,143,4.5,2.0,0.0,58.0,2017-06-16 08:42:10.162091
...,...,...,...,...,...,...,...,...,...,...
286577,20089199,759057,Entire home/apt,Gràcia,0,0.0,2.0,1.0,344.0,2017-07-28 12:51:27.760925
286578,20090140,1633304,Entire home/apt,Ciutat Vella,0,0.0,4.0,3.0,181.0,2017-07-28 11:53:02.094899
286579,20091366,124283414,Entire home/apt,Gràcia,0,0.0,4.0,2.0,102.0,2017-07-28 13:02:32.683058
286580,20093382,142887934,Entire home/apt,Ciutat Vella,0,0.0,4.0,1.0,144.0,2017-07-28 11:43:28.848047


In [35]:
barcelona.to_csv('../datasets/barcelona/barcelona_final.csv', index=False)