In [1]:
'''
Author: Jett Pettus  
Created: November 25, 2019   

Calculate the distance between cities and tv markets   
-- Create dummies if that city is covered by the tv market

Must install geopy
'''
import numpy as np 
import pandas as pd
import geopy.distance 
import math 

# Directories:
raw = "../Data/Raw/"
tmp = "../Data/Temp/"
proc = "../Data/Processed/"

In [2]:
tv_data = pd.read_csv(proc + 'tv_data_b_sink.csv')
papers_data = pd.read_csv(proc + 'newspaper_gis_merge.csv')

In [3]:
def comp_distance(lat1, lon1, lat2, lon2):
    '''
    Input:
        Two coordinates 
    Return: 
        Distance in km
    '''
    c1 = (lat1, lon1)
    c2 = (lat2, lon2)
    dist = geopy.distance.distance(c1, c2).km
    return dist 

# Another Method (less accurate)
def distance(origin, destination):
    lat1, lon1 = origin
    lat2, lon2 = destination
    radius = 6371 # km

    dlat = math.radians(lat2-lat1)
    dlon = math.radians(lon2-lon1)
    a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
        * math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = radius * c

    return d

In [4]:
# Want to calculate the distance between every tv/newspaper pairing
# Then determine if that distance <= results_b (in km)
tv_loc = {}
for row in range(0, len(tv_data)):
    tvid = tv_data.loc[row, 'tvid']
    lat = tv_data.loc[row, 'latitude']
    lon = tv_data.loc[row, 'longitude']
    dist = tv_data.loc[row, 'results_b']
    year = tv_data.loc[row, 'year']
    tv_loc[tvid] = {}
    tv_loc[tvid] = [year, dist, lat, lon]

paper_loc = {}
for row in range(0, len(papers_data)):
    market_id = papers_data.loc[row, 'market_id']
    lat = papers_data.loc[row, 'latitude']
    lon = papers_data.loc[row, 'longitude']
    paper_loc[market_id] = {}
    paper_loc[market_id] = [lat, lon]

In [5]:
def mult_result(mult):
    pct = (100 + mult)/100
    print(pct)
    results = {}
    for key in paper_loc.keys():
        results[key] = []
        lat1 = paper_loc[key][0]
        lon1 = paper_loc[key][1]
        for tv in tv_loc.keys():
            lat2 = tv_loc[tv][2]
            lon2 = tv_loc[tv][3]
            resb_dist = tv_loc[tv][1] * pct
            year = tv_loc[tv][0]
            distance = comp_distance(lat1, lon1, lat2, lon2)
            if distance <= resb_dist:
                 results[key].append(tv)

    markets_long = []
    tv_long = []
    for item in results.keys():
        for tv in range(0, len(results[item])):
            markets_long.append(item)
            tv_long.append(results[item][tv])

    df = pd.DataFrame()
    df['market_id'] = markets_long
    df['tvid'] = tv_long

    df.to_stata(tmp + 'market_tv_match' + str(mult) + '.dta', write_index = False)

In [6]:
for x in range(0,110,10):
    mult_result(x)
for x in range(10,110,10):
    mult_result(-x)

1.0
1.1
1.2
1.3
1.4
1.5
1.6
1.7
1.8
1.9
2.0
0.9
0.8
0.7
0.6
0.5
0.4
0.3
0.2
0.1
0.0
