## Read in data

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv('../df_labeled.csv')

In [4]:
print(f"Total null location values: {len(df[df['location'].isnull()])}")
print(f"Total values: {len(df)}")

Total null location values: 20855
Total values: 41902


In [5]:
df_clean = df[df['location'].notnull()]

In [34]:
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 21047 entries, 3 to 41899
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Unnamed: 0       21047 non-null  int64  
 1   text             21047 non-null  object 
 2   userid           21047 non-null  float64
 3   location         21047 non-null  object 
 4   coordinates      15 non-null     object 
 5   translated_text  21047 non-null  object 
 6   label            21047 non-null  float64
dtypes: float64(2), int64(1), object(4)
memory usage: 1.3+ MB


In [35]:
# map locations with latitude and longitude
unique_locations = df_clean['location'].unique()

In [78]:
import requests
from dotenv import load_dotenv
load_dotenv()
import os
from urllib.parse import urlencode
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

non_valid_locations = []

retry_strat = Retry(total=3, backoff_factor=0.5)
adapter = HTTPAdapter(max_retries=retry_strat)
http = requests.Session()
http.mount("https://", adapter=adapter)

def extract_lat_lng(location, data_type= 'json'):
    base_url = f"https://maps.googleapis.com/maps/api/geocode/{data_type}"
    params = {
        "address": location,
        "key": os.environ['API_KEY']
    }
    url_params = urlencode(params)
    url =f"{base_url}?{url_params}"
    r = http.get(url)

    if r.status_code not in range(200, 299) or r.json()['status'] == "ZERO_RESULTS":
        non_valid_locations.append(location)
        return {}

    return r.json()['results'][0]['geometry']['location']

In [79]:
# testing API call to geocoder
r = extract_lat_lng('ukrain')
lat, lon = r.values()
print(lat)
print(lon)

48.379433
31.16558


In [71]:
string = 'Ukrain'
r = extract_lat_lng(string)
lat, lon = r.values()
lat

48.379433

In [80]:
# iterate through locations and get lat/lon
from tqdm import tqdm

location_map = {}
for location in tqdm(unique_locations):
    res = extract_lat_lng(location=location)
    if not res:
        continue
    lat, lon = res.values()
    location_map[location] = (lat, lon)


100%|██████████| 5643/5643 [11:15<00:00,  8.36it/s]


In [87]:
dict_pre = {}
dict_pre['name'] =  []
dict_pre['lat'] =  []
dict_pre['lon'] =  []
for key, val in location_map.items():
    dict_pre['name'].append(key)
    dict_pre['lat'].append(val[0])
    dict_pre['lon'].append(val[1])

In [93]:
locations_with_lat_and_long = pd.DataFrame(dict_pre)
weird_locations = pd.DataFrame(non_valid_locations, columns=['name'])

In [97]:
locations_with_lat_and_long.to_csv("../locations_lat_lon.csv.gz", compression='gzip')
weird_locations.to_csv('../weird_locations.csv.gz', compression='gzip')

## Create national heat map

In [3]:
import pandas as pd
df = pd.read_csv('../merged_coords.csv.gz')

In [7]:
df.drop('Unnamed: 0', inplace=True, axis=1)

In [8]:
df.head(2)

Unnamed: 0,location,label,lat,lon
0,Ukraine,1.0,48.379433,31.16558
1,Ukraine,1.0,48.379433,31.16558


In [25]:
from scipy.interpolate import interp1d
import plotly.express as px

type_list = ['open-street-map']

for type in type_list:
    fig = px.density_mapbox(df, lat='lat', lon='lon', mapbox_style=type, radius=2, hover_name='location', zoom=0, range_color=[0, 1])

fig.show()

In [16]:
# separate pro-ukrain to pr-russia
df_russia = df[df['label'] == 0]
df_ukrain = df[df['label'] == 1]

In [18]:
from dotenv import load_dotenv
load_dotenv()
import os
import gmaps
from ipywidgets.embed import embed_minimal_html

def create_gmap(df_russia, df_ukrain, name='export'):
    gmaps.configure(api_key=os.environ['GMAPS_API_KEY'])

    ukrain_layer = gmaps.symbol_layer(
        df_ukrain[['lat', 'lon']], fill_color='rgba(0, 150, 0, 0.4)',
        stroke_color='rgba(0, 150, 0, 0.4)', scale=2
    )

    russia_layer = gmaps.symbol_layer(
        df_russia[['lat', 'lon']], fill_color='rgba(200, 0, 0, 0.4)',
        stroke_color='rgba(200, 0, 0, 0.4)', scale=2
    )

    fig = gmaps.figure()
    fig.add_layer(ukrain_layer)
    fig.add_layer(russia_layer)
    embed_minimal_html(f'../{name}.html', views=[fig])

create_gmap(df_russia=df_russia, df_ukrain=df_ukrain, name='gmaps')

In [41]:
# trying this with map box instead
import plotly.express as px

fig = px.scatter_mapbox(df_ukrain, lat="lat", lon="lon", hover_name="label", hover_data=["label"],
                        color_discrete_sequence=["green"], zoom=3, height=500, mapbox_style="open-street-map")

fig.write_html('../ukrain.html')

In [40]:
import plotly.express as px

fig = px.scatter_mapbox(df_russia, lat="lat", lon="lon", hover_name="label", hover_data=["location"],
                        color_discrete_sequence=["red"], zoom=3, height=500, mapbox_style="open-street-map")

fig.write_html('../russia.html')