# The Analyzer

In [1]:
# Libraries
import json
import urllib
import folium
import requests
import pandas as pd
from IPython.display import clear_output

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('wine_data.csv', 
                 sep=',', 
                 encoding='utf-8')

df1 = pd.read_csv('wine_data.csv', 
                 sep=',', 
                 encoding='utf-8')[['land']]

df.head()

Unnamed: 0,typ,wein_name,land,region,produzent,rebsorte,alkohol,inhalt_cl,anzahl_fl,jahrgang,prime_start,prime_end,preis_chf,preis_cl
0,rot,Apothic Inferno Red Blend Wine with a Whiskey ...,USA,,E. & J. Gallo Winery,,16.0,75.0,1,2018,2022,2023,19.95,0.27
1,rot,Apothic Dark California Red Blend,USA,,,diverse Sorten,14.0,75.0,1,2016,2022,2024,12.95,0.17
2,rot,Antonini Monte Chiara Montepulciano d'Abruzzo DOC,Italien,Abruzzen,Montepulciano,,12.5,75.0,1,2020,2023,2024,3.75,0.05
3,rot,Angelin Langhe Nebbiolo DOC,Italien,Piemont,Angelo Negro,Nebbiolo,14.0,75.0,1,2021,2022,2026,17.5,0.23
4,rot,Amarone della Valpolicella DOCG Vigne Alte Zeni,Italien,Venetien,Cantina Zeni,Molinara,15.5,75.0,1,2019,2024,2028,32.95,0.44


In [3]:
df2 = df1.dropna()
df2

Unnamed: 0,land
0,USA
1,USA
2,Italien
3,Italien
4,Italien
...,...
2655,Spanien
2656,Italien
2657,Italien
2658,Italien


In [4]:
df3 = df2.drop_duplicates()
df3 = df3.reset_index(drop=True)
df3.head()

Unnamed: 0,land
0,USA
1,Italien
2,Argentinien
3,Frankreich
4,Chile


In [5]:
import requests
import urllib.parse

geolocation = []
n = 1
for i in df3['land'].astype(str):
    
    print('Geocoding address', 
          n, 
          'out of', 
          len(df3['land']), 
          ':', 
          i)
    n=n+1
    clear_output(wait=True)
    
    try:
            url = 'https://nominatim.openstreetmap.org/search/' + urllib.parse.quote(i) +'?format=json'
            response = requests.get(url).json()
            geolocation.append(pd.Series(data={'lat': response[0]["lat"], 'lon': response[0]["lon"]}))

    except:
            geolocation.append(pd.Series(data={'lat': None, 'lon': None}))
            
        

# Write lat and lon to df
df_loc = pd.DataFrame(geolocation, 
                      columns=("lat", "lon"), 
                      index=range(len(df3['land'])))
df3['lat'] = df_loc['lat']
df3['lon'] = df_loc['lon']
df3.head(50)

Unnamed: 0,land,lat,lon
0,USA,39.7837304,-100.445882
1,Italien,42.6384261,12.674297
2,Argentinien,-34.9964963,-64.9672817
3,Frankreich,46.603354,1.8883335
4,Chile,-31.7613365,-71.3187697
5,Portugal,39.6621648,-8.1353519
6,Schweiz,46.7985624,8.2319736
7,Australien,-24.7761086,134.755
8,Spanien,39.3260685,-4.8379791
9,Deutschland,51.1638175,10.4478313


In [6]:
# Initialisierung der Map
m = folium.Map(location=[47.44, 8.65], zoom_start=10)

# Add lat/lon of addresses
for i in range(0, len(df3)):
    folium.Marker(location=(df3.iloc[i]['lat'], 
                            df3.iloc[i]['lon']), 
                  popup=df3.iloc[i]['land']).add_to(m)

# Layer control
folium.LayerControl().add_to(m)

# Plot map
m

In [7]:
merged_df = pd.merge(df, df3, on='land')
for index, row in merged_df.iterrows():
    df.loc[df['land'] == row['land'], 'lat'] = row['lat']
    df.loc[df['land'] == row['land'], 'lon'] = row['lon']   


In [8]:
merged_df.head()

Unnamed: 0,typ,wein_name,land,region,produzent,rebsorte,alkohol,inhalt_cl,anzahl_fl,jahrgang,prime_start,prime_end,preis_chf,preis_cl,lat,lon
0,rot,Apothic Inferno Red Blend Wine with a Whiskey ...,USA,,E. & J. Gallo Winery,,16.0,75.0,1,2018,2022,2023,19.95,0.27,39.7837304,-100.445882
1,rot,Apothic Dark California Red Blend,USA,,,diverse Sorten,14.0,75.0,1,2016,2022,2024,12.95,0.17,39.7837304,-100.445882
2,rot,Bike Scorpion mit Cabernet Sauvignon,USA,,,Cabernet Sauvignon,13.0,75.0,1,2019,2023,2027,8.5,0.11,39.7837304,-100.445882
3,rot,Cabernet Sauvignon Woodbridge Robert Mondavi,USA,,Cabernet Sauvignon,,13.5,75.0,1,2018,2022,2025,9.95,0.13,39.7837304,-100.445882
4,rot,Cabernet Sauvignon Valley Oaks California Fetzer,USA,,Fetzer,Cabernet Sauvignon,13.5,75.0,1,2019,2023,2024,17.95,0.24,39.7837304,-100.445882


In [10]:
df.to_csv('wine_data_geo.csv', 
          sep=",", 
          encoding='utf-8',
          index=False)