In [1]:
import pandas as pd, requests, json
import os

In [2]:
 # generate path to csv
csv_path = os.path.join('NIELSEN_VOD_VIEWING_HABITS-V4.CSV')
json_converter = pd.read_csv(csv_path)

In [3]:
# turn the json data into a dataframe and see how many rows and what columns we have
df = pd.DataFrame(json_converter)
print('We have {} rows'.format(len(df)))
str(df.columns.tolist())

We have 9865 rows


"['row', 'age', 'username', 'gender', 'city', 'state', 'zip', 'lat', 'lng', 'freq', 'services', 'hulu', 'youtube', 'Apple TV+', 'FX', 'netflix', 'fuboTV', 'amazon', 'PlutoTV', 'Discovery+', 'Starz', 'Showtime', 'Philo', 'Crackle', 'CBS All Access', 'IMDb TV', 'Disney+', 'HBO MAX', 'NBC', 'Rent or Buy', 'FOX', 'Peacock', 'Tubi']"

In [4]:
# convert lat-long to floats and change services from ALL CAPS to regular capitalization
df['latitude'] = json_converter['lat'].astype(float)
df['longitude'] = json_converter['lng'].astype(float)

In [5]:
# we don't need all those columns - only keep useful ones
cols = ['age','latitude', 'longitude','services']
df_subset = df[cols]

In [6]:
# drop any rows that lack lat/long data
df_geo = df_subset.dropna(subset=['latitude', 'longitude'], axis=0, inplace=False)

print('We have {} geotagged rows'.format(len(df_geo)))
df_geo.tail()

We have 9515 geotagged rows


Unnamed: 0,age,latitude,longitude,services
9860,76,46.0905,-100.6516,"{'Apple TV+': 1, 'hulu': 1}"
9861,18,32.8929,-80.0458,"{'youtube': 1, 'hulu': 1, 'netflix': 1}"
9862,28,42.6526,-73.7562,"{'Showtime': 1, 'netflix': 1, 'Discovery+': 1}"
9863,58,40.6612,-91.5745,{'youtube': 1}
9864,66,40.0826,-79.8501,"{'youtube': 1, 'netflix': 1}"


In [7]:
def df_to_geojson(df, properties, lat='latitude', lon='longitude'):
    # create a new python dict to contain our geojson data, using geojson format
    geojson = {'type':'FeatureCollection', 'features':[]}

    # loop through each row in the dataframe and convert each row to geojson format
    for _, row in df.iterrows():
        # create a feature template to fill in
        feature = {'type':'Feature',
                   'properties':{},
                   'geometry':{'type':'Point',
                               'coordinates':[]}}

        # fill in the coordinates
        feature['geometry']['coordinates'] = [row[lon],row[lat]]

        # for each column, get the value and add it as a new feature property
        for prop in properties:
            feature['properties'][prop] = row[prop]
        
        # add this feature (aka, converted dataframe row) to the list of features inside our dict
        geojson['features'].append(feature)
    
    return geojson

In [8]:
cols = ['age','latitude', 'longitude','services']
geojson = df_to_geojson(df_geo, cols)

In [9]:
import IPython
IPython.display.display({'application/geo+json': geojson}, raw=True)
geojson

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'properties': {'age': 62,
    'latitude': 39.2564,
    'longitude': -119.9521,
    'services': "{'hulu': 1}"},
   'geometry': {'type': 'Point', 'coordinates': [-119.9521, 39.2564]}},
  {'type': 'Feature',
   'properties': {'age': 60,
    'latitude': 35.7252,
    'longitude': -92.5575,
    'services': "{'youtube': 1, 'Apple TV+': 1}"},
   'geometry': {'type': 'Point', 'coordinates': [-92.5575, 35.7252]}},
  {'type': 'Feature',
   'properties': {'age': 34,
    'latitude': 32.2143,
    'longitude': -82.4762,
    'services': "{'hulu': 1}"},
   'geometry': {'type': 'Point', 'coordinates': [-82.4762, 32.2143]}},
  {'type': 'Feature',
   'properties': {'age': 73,
    'latitude': 34.1387,
    'longitude': -87.3974,
    'services': "{'FX': 1}"},
   'geometry': {'type': 'Point', 'coordinates': [-87.3974, 34.1387]}},
  {'type': 'Feature',
   'properties': {'age': 46,
    'latitude': 38.9907,
    'longitude': -77.0261,
    'service

In [10]:
useful_columns = ['age','latitude', 'longitude','services']
geojson_dict = df_to_geojson(df_geo, properties=useful_columns)
geojson_str = json.dumps(geojson_dict, indent=2)

In [11]:
output_filename = 'dataset.js'
with open(output_filename, 'w') as output_file:
    output_file.write('var dataset = {};'.format(geojson_str))

In [12]:
# how many features did we save to the geojson file?
print('{} geotagged features saved to file'.format(len(geojson_dict['features'])))

9515 geotagged features saved to file
