In [146]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', None)

In [3]:
df = pd.read_csv("SpeedDatingData.csv", encoding="ISO-8859-1")

In [166]:
def get_imputed_table(df, seed=0):

    rng = np.random.default_rng(seed=seed)

    df['scored_decision'] = df['like'] + df['dec']*0.5
    df['stdzed_scored_decision'] = df.groupby('iid')['scored_decision'].transform(lambda x: ((x - x.mean()) / x.std()))

    large_pivot = df[['iid', 'pid', 'stdzed_scored_decision']].pivot(index='iid', columns='pid', values='stdzed_scored_decision').iloc[:, 1:]
    randomly_generated = pd.DataFrame(rng.normal(0,1, size=large_pivot.shape), large_pivot.index, large_pivot.columns)
    imputed_score = large_pivot.fillna(randomly_generated)

    return imputed_score

In [168]:
imputed_table = get_imputed_table(df)

In [169]:
preference_matrix = pd.DataFrame(np.tril(imputed_table.to_numpy()) + np.tril(imputed_table.to_numpy().T))
np.fill_diagonal(preference_matrix.values, -0.5)

In [96]:
imputed_table.to_csv("imputed_table.csv")

In [170]:
preference_matrix.to_csv("full_preference_matrix.csv")

In [131]:
people = pd.read_csv("people.csv", index_col="iid")

In [147]:
z = pd.read_csv("z.csv")
z.index = range(194, 234)
z.columns = range(194, 234)

In [148]:
pairs = []
for i in range(194, 234):
    for j in range(194, 234):
        if z.loc[i, j] == 1:
            pairs.append((i, j))

In [144]:
pairs

[(196, 217),
 (197, 231),
 (198, 228),
 (199, 221),
 (200, 215),
 (201, 216),
 (202, 233),
 (203, 226),
 (204, 225),
 (205, 222),
 (206, 220),
 (207, 219),
 (208, 227),
 (209, 224),
 (210, 229),
 (211, 230),
 (213, 214),
 (214, 213),
 (215, 200),
 (216, 201),
 (217, 196),
 (219, 207),
 (220, 206),
 (221, 199),
 (222, 205),
 (224, 209),
 (225, 204),
 (226, 203),
 (227, 208),
 (228, 198),
 (229, 210),
 (230, 211),
 (231, 197),
 (233, 202)]

In [153]:
np.sum(z).sum()

38.0

In [154]:
np.sum(z * imputed_table.loc[194:233, 194:233]).sum() -1

32.24181870518917

In [198]:
group9_people = people.loc[194:233].copy()

In [174]:
group9_preferences = preference_matrix.loc[194:233, 194:233]

In [175]:
group9_preferences

Unnamed: 0,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233
194,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
195,-1.490595,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
196,0.566616,1.985818,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
197,0.306019,2.100746,-0.071694,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
198,-0.345061,-0.283935,1.710939,0.144869,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
199,1.329693,1.646054,0.297307,-1.028878,2.193442,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
200,0.223827,-2.084176,2.400282,-0.589882,-1.706404,-0.688506,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
201,-0.398558,-1.277669,0.306488,1.135413,0.461466,1.667479,-0.70665,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
202,0.820282,1.199046,-0.180892,0.767943,-2.253167,-0.591606,-0.948417,0.359105,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
203,0.032719,2.344176,0.27754,-1.047309,0.244327,-1.573143,0.873444,-2.058645,-1.61097,-0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [199]:
group9_people['religion_text'] = group9_people.iloc[:, 2:].idxmax(axis=1)

In [200]:
group9_people

Unnamed: 0_level_0,gender,imprelig,agnostic,atheist,buddhist,catholic,hindu,jewish,mormon,muslim,protestant,unaffiliated,religion_text
iid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
194,0,1,1,0,0,0,0,0,0,0,0,0,agnostic
195,0,1,0,0,0,0,0,0,1,0,0,0,mormon
196,0,1,0,0,0,0,0,0,0,0,1,0,protestant
197,0,0,0,0,0,0,0,0,0,0,1,0,protestant
198,0,0,0,0,0,0,0,0,0,0,0,1,unaffiliated
199,0,0,1,0,0,0,0,0,0,0,0,0,agnostic
200,0,0,0,1,0,0,0,0,0,0,0,0,atheist
201,0,1,0,0,0,0,0,0,0,0,0,1,unaffiliated
202,0,0,0,0,0,0,0,0,1,0,0,0,mormon
203,0,1,0,0,0,0,0,0,0,0,1,0,protestant


In [206]:
def get_heuristic_pairs(ld_preference_matrix, people_df, rho=-0.5, seed=0):
    used = set()
    total_value = {}
    for i in ld_preference_matrix:
        if i not in used:
            ordered_column = ld_preference_matrix.loc[:, i].sort_values(ascending=False)
            rho_requirement = ordered_column >= rho
            used_requirement = ~(ordered_column.index.isin(used))
            gender_requirement = people_df.loc[ordered_column.index]['gender'] != people_df.loc[i]['gender']
            religion_requirement = people_df.loc[ordered_column.index]['religion_text'] == people_df.loc[i]['religion_text']
            avail = ordered_column[rho_requirement & used_requirement & gender_requirement & religion_requirement]
            used.add(i)
            if len(avail) > 0:      
                used.add(avail.index[0])
                total_value[(i, avail.index[0])] = avail.iloc[0]
            else:
                total_value[(i, None)] = rho

    return used, total_value

In [207]:
used,total_value = get_heuristic_pairs(group9_preferences, group9_people, rho=-0.5, seed=0)

In [211]:
sum(total_value.values())

14.1572340251115

In [25]:
wave9 = df[df['wave']==9][['iid', 'gender', 'pid', 'stdzed_scored_decision']]

In [28]:
wave9[wave9['gender']==1]

Unnamed: 0,iid,gender,pid,stdzed_scored_decision
3008,214,1,194.0,-1.769323
3009,214,1,195.0,-0.494135
3010,214,1,196.0,-0.812932
3011,214,1,197.0,0.462255
3012,214,1,198.0,-2.406916
...,...,...,...,...
3403,233,1,209.0,-0.520988
3404,233,1,210.0,2.083952
3405,233,1,211.0,-1.041976
3406,233,1,212.0,0.000000


In [7]:
preferences_0 = wave9[wave9['gender']==0].pivot(index='iid', columns='pid', values='stdzed_scored_decision').fillna(0)
preferences_1 = wave9[wave9['gender']==1].pivot(index='iid', columns='pid', values='stdzed_scored_decision').fillna(0)
#preferences_0.to_csv('preferences_0.csv')
#preferences_1.to_csv('preferences_1.csv')

In [149]:
preferences_0

pid,214.0,215.0,216.0,217.0,218.0,219.0,220.0,221.0,222.0,223.0,224.0,225.0,226.0,227.0,228.0,229.0,230.0,231.0,232.0,233.0
iid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
194,0.700783,1.60502,-1.559808,-0.203453,-0.203453,-0.203453,0.248665,1.60502,-1.559808,-0.655572,0.700783,0.248665,0.700783,0.700783,-0.655572,0.700783,-1.559808,0.700783,-1.559808,0.248665
195,1.596872,1.909984,-0.908025,-0.594913,-0.281801,0.970648,0.970648,-0.281801,-1.53425,0.344423,0.344423,0.657536,0.031311,-0.281801,-0.908025,0.031311,-1.53425,0.970648,-1.53425,0.031311
196,1.238009,1.238009,1.463102,2.138379,-0.787824,1.688194,-0.787824,-0.337639,-0.787824,-0.337639,-1.238009,-0.787824,-0.337639,-0.337639,-1.238009,-0.337639,-0.337639,0.112546,-0.337639,0.112546
197,-0.579409,-0.579409,-0.579409,1.146489,-0.579409,-0.579409,-0.579409,-0.579409,-0.579409,-0.086295,-0.086295,-0.579409,0.406819,-0.086295,-0.086295,-0.579409,-0.579409,2.62583,2.62583,-0.086295
198,-0.295561,1.182244,0.443342,-0.295561,-0.295561,-0.295561,-1.773366,-1.034464,-1.773366,-0.295561,-0.295561,0.443342,1.182244,1.182244,1.182244,1.182244,0.443342,0.443342,-1.773366,0.443342
199,0.385185,0.977777,-1.195061,0.187654,0.187654,1.372838,0.780246,0.977777,-1.195061,1.175308,-0.009877,0.977777,0.977777,0.385185,-0.009877,-1.195061,-1.590122,-1.195061,-1.590122,-0.404938
200,0.860309,0.860309,-1.75503,1.514144,-0.447361,0.860309,0.860309,-0.447361,-0.447361,0.860309,1.514144,0.860309,-0.447361,-0.447361,-0.447361,-1.101196,-1.75503,-0.447361,-0.447361,0.0
201,0.707511,0.707511,0.17354,0.440526,-2.496313,-0.894401,-0.894401,0.17354,-1.962342,0.707511,-0.36043,0.17354,1.508467,0.17354,-0.36043,0.707511,1.508467,0.17354,-0.36043,0.17354
202,1.086515,1.086515,-0.585047,1.504405,-0.585047,0.250734,-1.420827,-0.585047,-1.420827,0.250734,-0.585047,0.250734,1.504405,0.250734,-1.420827,1.504405,-1.420827,0.250734,0.250734,-0.167156
203,-1.29268,0.460106,-2.169073,1.774696,0.460106,0.460106,0.460106,-0.416287,-0.416287,0.02191,-0.854483,0.898303,1.336499,0.02191,0.460106,0.898303,-1.29268,-0.854483,-0.854483,0.898303


In [8]:
preference_table = preferences_0 + preferences_1.T

In [77]:
sum(total_value.values())

37.882432253466256

In [78]:
total_value

{(214.0, 210): 2.5997143699910414,
 (215.0, 195): 3.371111556466804,
 (216.0, 204): 1.9693407147629431,
 (217.0, 212): 2.795351099952753,
 (218.0, 207): 1.5985102915396,
 (219.0, 196): 2.416330437474175,
 (220.0, 206): 3.320029642079919,
 (221.0, 199): 1.9248394909857827,
 (222.0, 205): 1.2572050787734272,
 (223.0, 200): 1.4821477408251018,
 (224.0, 208): 1.545301953169824,
 (225.0, 203): 1.561518753225648,
 (226.0, 211): 2.1942004201709273,
 (227.0, 213): 2.1229796933179386,
 (228.0, 209): 1.1747524262592166,
 (229.0, 202): 1.0722500762121299,
 (230.0, 201): 1.9609008834774704,
 (231.0, 197): 4.0935941535953475,
 (232.0, 0): -0.5,
 (233.0, 198): -0.07764652881379225}

In [65]:
preference_table

pid,214.0,215.0,216.0,217.0,218.0,219.0,220.0,221.0,222.0,223.0,224.0,225.0,226.0,227.0,228.0,229.0,230.0,231.0,232.0,233.0
iid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
194,-1.068539,-0.153286,-3.900802,-0.624137,-1.337434,-1.555706,-2.456722,1.386467,-2.70114,-0.724665,1.16235,-1.063656,-0.818385,0.141493,-1.310275,0.876847,-3.659564,-0.58351,-2.571012,-0.793311
195,1.102737,3.371112,-0.884379,-0.005956,0.852179,1.698784,1.327963,0.082454,-2.675581,-0.415602,0.80599,0.756312,-0.208558,0.214174,-0.44038,1.727921,-1.081816,2.438411,-1.058389,-1.010665
196,0.425077,-0.024999,0.067964,2.39079,-2.569793,2.41633,-0.430509,-1.138999,-0.714973,-2.824994,-2.095204,-1.535706,-0.577508,-1.319036,-0.770364,-1.378013,0.578829,-0.560179,-1.943668,-0.92943
197,-0.117153,0.386421,-1.974546,1.735446,-2.361378,-0.683428,-0.222093,-0.215154,0.464789,-0.84632,1.364342,0.083807,1.766075,0.831786,-0.740999,-1.619782,-0.591009,4.093594,4.291342,-1.128271
198,-2.702477,-0.080764,0.466988,-2.062432,-1.429541,-2.479969,-2.436952,-3.292843,-2.914698,-2.091984,-1.152756,-0.868979,-0.336925,-0.221259,0.901657,0.14187,0.895775,-1.45252,-3.974221,-0.077647
199,0.209847,0.953012,-1.171414,1.449705,0.349651,1.268819,-1.92514,1.924839,-0.150864,1.797146,-0.537381,0.229895,1.697383,2.147479,-1.412813,-1.627216,-1.137689,-0.338865,-1.709087,-0.404938
200,1.322564,1.826139,-1.731384,2.103101,0.68662,1.588445,1.217624,0.499702,0.111163,1.482148,1.64602,2.087964,-0.047579,0.892827,-2.598531,-1.101196,-2.462683,1.020403,1.218151,0.0
201,0.532173,0.930395,1.143043,0.019842,-2.334316,-0.582343,-0.128726,0.537795,-3.589347,0.638418,0.101136,-0.574341,1.268599,0.036356,-0.266901,0.883574,1.960901,-0.499185,-0.776808,0.17354
202,2.186364,0.318804,0.384456,1.420269,-0.099055,-0.269363,-1.471872,0.362016,-1.347976,-0.509291,0.20621,-1.061587,0.624887,-0.730663,-2.823764,1.07225,-3.056549,-0.421991,-0.463057,0.87482
203,-0.19283,-0.307605,-1.19957,2.363653,0.946098,1.188242,0.817422,-0.052032,0.142237,1.33468,-0.392917,1.561519,1.736281,-0.537381,1.675985,0.466148,-0.840246,0.001712,-0.973448,2.982255


In [46]:
matched_partners = wave9[wave9['gender']==0].merge(wave9[wave9['gender']==1], left_on=['pid', 'iid'], right_on=['iid', 'pid'], suffixes=('_0', '_1'))

In [53]:
matched_partners.to_csv('matched_partners_wave_9.csv')

In [114]:
df['date_only'] = df['timestamp'].str.split(' ').str[0]

In [118]:
oneday = df[df['date_only']=='2019-11-14']

In [123]:
#bag of words 
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()
total_cars_matrix = cv.fit_transform(oneday['cars_list'])

In [127]:
#turn into dataframe

total_cars_df = pd.DataFrame(total_cars_matrix.toarray(), columns=cv.get_feature_names_out())

In [145]:
combodf = pd.concat([oneday.reset_index(drop=True), total_cars_df], axis=1)

In [151]:
combodf

Unnamed: 0,latitude,longitude,total_cars,cars_list,timestamp,date_only,10,100,101,102,...,90,91,92,93,94,95,96,97,98,99
0,32.091010,34.786330,1,[197],2019-11-14 00:03:03 UTC,2019-11-14,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,32.124610,34.830230,1,[116],2019-11-14 00:03:03 UTC,2019-11-14,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,32.097650,34.800800,1,[103],2019-11-14 00:03:03 UTC,2019-11-14,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,32.078621,34.797969,1,[203],2019-11-14 00:03:03 UTC,2019-11-14,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,32.076318,34.767295,1,[248],2019-11-14 00:03:03 UTC,2019-11-14,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160353,32.079825,34.774450,2,"[148, 67]",2019-11-14 21:33:02 UTC,2019-11-14,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
160354,32.143248,34.792340,3,"[94, 139, 272]",2019-11-14 21:33:02 UTC,2019-11-14,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
160355,32.064615,34.795787,3,"[259, 72, 153]",2019-11-14 21:33:02 UTC,2019-11-14,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
160356,32.108610,34.797300,1,[55],2019-11-14 21:33:02 UTC,2019-11-14,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [166]:
groupedup = oneday.sort_values('timestamp').groupby(['latitude', 'longitude', 'timestamp'])[['total_cars']].mean()

In [174]:
groupedup['rolling'] = groupedup['total_cars'].rolling(2).mean()

In [190]:
grouped = oneday.sort_values('timestamp').groupby(['latitude', 'longitude']).rolling(2, on='timestamp')[['total_cars']].mean()


In [202]:
grouped['cars'] = oneday.sort_values('timestamp').groupby(['latitude', 'longitude', 'timestamp'])[['total_cars']].mean()

In [203]:
grouped[(grouped['total_cars'] != grouped['cars']) & (grouped['total_cars'].notnull())]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total_cars,cars
latitude,longitude,timestamp,Unnamed: 3_level_1,Unnamed: 4_level_1
32.034470,34.74685,2019-11-14 14:39:03 UTC,1.5,1.0
32.035393,34.75873,2019-11-14 04:09:02 UTC,0.5,0.0
32.035393,34.75873,2019-11-14 12:30:03 UTC,0.5,1.0
32.035393,34.75873,2019-11-14 12:36:02 UTC,0.5,0.0
32.036900,34.76250,2019-11-14 05:03:02 UTC,0.5,1.0
...,...,...,...,...
32.143248,34.79234,2019-11-14 11:06:02 UTC,3.5,3.0
32.143248,34.79234,2019-11-14 13:03:02 UTC,3.5,4.0
32.143248,34.79234,2019-11-14 16:54:09 UTC,3.5,3.0
32.143248,34.79234,2019-11-14 18:18:03 UTC,3.5,4.0


In [225]:
grouped.loc[(grouped['total_cars'] - grouped['cars']) > 0, 'Demand'] = 1

grouped.loc[(grouped['total_cars'] - grouped['cars']) < 0, 'Supply'] = 1

In [227]:
grouped.Demand.sum()

997.0

In [228]:
grouped.Supply.sum()

985.0

In [220]:
oneday[oneday['latitude']==32.072323].sort_values('timestamp')[250:300]

Unnamed: 0,latitude,longitude,total_cars,cars_list,timestamp,date_only
7119225,32.072323,34.790555,2,"[18, 54]",2019-11-14 12:30:03 UTC,2019-11-14
7082557,32.072323,34.790555,2,"[18, 54]",2019-11-14 12:33:03 UTC,2019-11-14
7085497,32.072323,34.790555,2,"[18, 54]",2019-11-14 12:36:02 UTC,2019-11-14
7082993,32.072323,34.790555,2,"[18, 54]",2019-11-14 12:39:02 UTC,2019-11-14
7092512,32.072323,34.790555,2,"[18, 54]",2019-11-14 12:42:02 UTC,2019-11-14
7115525,32.072323,34.790555,2,"[18, 54]",2019-11-14 12:45:02 UTC,2019-11-14
7026960,32.072323,34.790555,2,"[18, 54]",2019-11-14 12:48:03 UTC,2019-11-14
7118317,32.072323,34.790555,2,"[18, 54]",2019-11-14 12:51:02 UTC,2019-11-14
7124652,32.072323,34.790555,2,"[18, 54]",2019-11-14 12:54:03 UTC,2019-11-14
7122201,32.072323,34.790555,2,"[18, 54]",2019-11-14 12:57:03 UTC,2019-11-14


In [99]:
# turn into a dataframe
total_cars_matrix[:,4].sum()

0

In [86]:
total_cars_df.sum()

0       0
1       0
2       0
3       0
4       0
       ..
256     7
257    26
258    29
259    29
260    27
Length: 261, dtype: int64

In [35]:
# make cars_list a list

df['cars_list'].str.sl

0           [[138]]
1            [[64]]
2           [[120]]
3           [[110]]
4           [[190]]
             ...   
20049193       [[]]
20049194       [[]]
20049195       [[]]
20049196       [[]]
20049197       [[]]
Name: cars_list, Length: 20049198, dtype: object

In [33]:
df[df['cars_list'].apply(lambda x: 71 in x)]

TypeError: 'in <string>' requires string as left operand, not int