# Get weather data

## Imports and functions

In [1]:
import pandas as pd
import numpy as np

# Import Meteostat library and dependencies
from datetime import datetime
import matplotlib.pyplot as plt
from meteostat import Point, Daily

In [2]:
def get_time_period(df):
    year = df['year']

    start_month = int(4) if int(df['location']) == 1 else int(10)
    start = datetime(year, start_month, 1)

    end_month = int(10) if int(df['location']) == 1 else int(4)
    end_day = int(31) if int(df['location']) == 1 else int(30)
    end = datetime(year, end_month, end_day)

    return (start, end)

In [3]:
def get_temp_data(df):
    for index, row in df.iterrows():
        start, end = get_time_period(row)
        row_lat = float(row['lat'])
        row_lon = float(row['lon'])
        regions = Point(row_lat,row_lon)

        data = Daily(regions, start, end)
        data = data.fetch()

        data = data[['tavg', 'tmin', 'tmax', 'prcp', 'wspd', 'wpgt', 'tsun']]

        avg_temp = data['tavg'].mean()
        min_temp = data['tmin'].mean()
        max_temp = data['tmax'].mean()
        precipitation = data['prcp'].mean()
        wind_speed = data['wspd'].mean()
        wind_peak = data['wpgt'].mean()
        sunshine = data['tsun'].mean()

        df.at[index, 'avg_temp'] = avg_temp
        df.at[index, 'min_temp'] = min_temp
        df.at[index, 'max_temp'] = max_temp
        df.at[index, 'precipitation_in_mm'] = precipitation
        df.at[index, 'wind_average_kmh'] = wind_speed
        df.at[index, 'wind_peak_kmh'] = wind_peak
        df.at[index, 'sun_minutes'] = sunshine
        
    return df

In [4]:
def fix_geo_data(df, point):
    df['lat'] = df['lat'].replace(df['lat'].iloc[0], point._lat)
    df['lon'] = df['lon'].replace(df['lon'].iloc[0], point._lon)

    return df

## Import csvs
- winemag-data-130k-v2+extract.csv
- winemag-data-130k-v2+geo.csv

In [5]:
df = pd.read_csv('../Data/winemag-data-130k-v2+extract.csv', sep=';')

In [6]:
df = df[['country','province','region_1','year']]

In [7]:
df_geo = pd.read_csv('../Data/winemag-data-130k-v2+geo.csv', sep=';', index_col=0)

In [8]:
df_geo = df_geo[['lat','lon']]

In [9]:
# will perform left join
result = df.join(df_geo)

In [10]:
result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 129971 entries, 0 to 129970
Data columns (total 6 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   country   129908 non-null  object 
 1   province  129908 non-null  object 
 2   region_1  108724 non-null  object 
 3   year      129931 non-null  float64
 4   lat       87966 non-null   float64
 5   lon       87966 non-null   float64
dtypes: float64(3), object(3)
memory usage: 5.9+ MB


### Drop duplicates

In [11]:
result = result.drop_duplicates()

In [12]:
result.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 11683 entries, 0 to 129952
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   country   11682 non-null  object 
 1   province  11682 non-null  object 
 2   region_1  9199 non-null   object 
 3   year      11656 non-null  float64
 4   lat       6211 non-null   float64
 5   lon       6211 non-null   float64
dtypes: float64(3), object(3)
memory usage: 638.9+ KB


### Subset - year between 1990 and 2017

In [13]:
result_year = result[(result["year"] >= 1990) & (result["year"] <= 2023)]

In [14]:
print(result_year.isnull().sum())

country        0
province       0
region_1    2132
year           0
lat         4770
lon         4770
dtype: int64


### Drop NA values

In [15]:
df_year = result_year[result_year['lat'].notna()]

In [16]:
df_year.reset_index(inplace=True, drop=True)

In [17]:
df_year.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5074 entries, 0 to 5073
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   country   5074 non-null   object 
 1   province  5074 non-null   object 
 2   region_1  3937 non-null   object 
 3   year      5074 non-null   float64
 4   lat       5074 non-null   float64
 5   lon       5074 non-null   float64
dtypes: float64(3), object(3)
memory usage: 238.0+ KB


### Fix data types

In [18]:
df_year['year'] = df_year['year'].astype('int')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [19]:
final_tb = df_year
final_tb.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5074 entries, 0 to 5073
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   country   5074 non-null   object 
 1   province  5074 non-null   object 
 2   region_1  3937 non-null   object 
 3   year      5074 non-null   int32  
 4   lat       5074 non-null   float64
 5   lon       5074 non-null   float64
dtypes: float64(2), int32(1), object(3)
memory usage: 218.1+ KB


### Add new column "location"

In [20]:
# check if location is in north or south of the earth
lat_list = final_tb['lat']
location = []
for n in lat_list:
    if n >= 0:
        location.append(1)
    else:
        location.append(0)

final_tb['location'] = location
final_tb['location'] = final_tb['location'].astype('int')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [21]:
final_tb.to_csv("../Data/resource_table.csv")

PermissionError: [Errno 13] Permission denied: '../Data/resource_table.csv'

## Get temperature data

In [22]:
final_tb = pd.read_csv("../Data/resource_table.csv", index_col=0)

### Test with an example

In [23]:
# Create Point for Vancouver, BC
start, end = get_time_period(final_tb[7:8])
vancouver = Point(49.2497, -123.1193, 70)



In [24]:
# Get daily data for year of first_row
data = Daily(vancouver, start, end)
data = data.fetch()



In [25]:
data

Unnamed: 0_level_0,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2013-04-01,11.9,7.0,16.7,0.0,0.0,,14.7,,1017.5,
2013-04-02,11.1,9.6,12.5,0.0,0.0,130.0,13.0,,1022.0,
2013-04-03,10.2,6.5,13.8,0.0,0.0,,8.0,,1020.6,
2013-04-04,10.4,8.6,12.1,22.0,0.0,,7.0,,1007.8,
2013-04-05,9.7,8.6,10.7,14.8,0.0,130.0,16.5,,1007.3,
...,...,...,...,...,...,...,...,...,...,...
2013-10-27,11.4,8.5,14.3,1.6,,,13.3,,1016.6,
2013-10-28,11.4,7.5,15.3,0.0,,,9.6,,1016.5,
2013-10-29,8.0,3.9,12.1,0.0,,,8.7,,1016.0,
2013-10-30,9.8,6.5,13.0,4.0,0.0,91.0,14.6,,1018.8,


In [26]:
avg_temp = data['tavg'].mean()
avg_temp

14.715420560747665

In [27]:
min_temp = data['tmin'].min()
min_temp

2.1

In [28]:
max_temp = data['tmax'].max()
max_temp

30.8

### Get data with function

In [29]:
data = final_tb[7:8]
result = get_temp_data(data)
result.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1 entries, 7 to 7
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   index                1 non-null      int64  
 1   country              1 non-null      object 
 2   province             1 non-null      object 
 3   region_1             0 non-null      object 
 4   year                 1 non-null      int64  
 5   lat                  1 non-null      float64
 6   lon                  1 non-null      float64
 7   location             1 non-null      int64  
 8   avg_temp             1 non-null      float64
 9   min_temp             1 non-null      float64
 10  max_temp             1 non-null      float64
 11  precipitation_in_mm  1 non-null      float64
 12  wind_average_kmh     1 non-null      float64
 13  wind_peak_kmh        0 non-null      float64
 14  sun_minutes          0 non-null      float64
dtypes: float64(9), int64(3), object(3)
memory us

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [30]:
final_tb = get_temp_data(final_tb)
final_tb



Unnamed: 0,index,country,province,region_1,year,lat,lon,location,avg_temp,min_temp,max_temp,precipitation_in_mm,wind_average_kmh,wind_peak_kmh,sun_minutes
0,0,Italy,Sicily & Sardinia,Etna,2013,40.047396,8.286983,1,,,,,,,
1,1,Portugal,Douro,,2011,39.886265,-8.268731,1,19.021028,13.725503,26.029717,9.736842,,,
2,2,US,Oregon,Willamette Valley,2013,45.420675,-122.670649,1,16.361215,11.382710,22.425234,1.881776,9.025701,,
3,3,US,Michigan,Lake Michigan Shore,2013,41.707539,-86.895030,1,16.006103,10.642254,21.388732,0.778358,8.595283,,
4,4,US,Oregon,Willamette Valley,2012,45.420675,-122.670649,1,16.184579,11.049533,22.211215,2.032710,9.443458,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5069,129618,US,California,San Antonio Valley,2009,38.628683,-92.565963,1,,,,,,,
5070,129820,Greece,Goumenissa,,2008,40.946644,22.451941,1,,,,,,,
5071,129933,Austria,Austria,,2005,47.835690,14.221708,1,14.865888,10.711215,20.398131,3.568224,,,
5072,129935,US,Washington,Columbia Valley-Walla Walla Valley,2006,38.895037,-77.036543,1,20.551174,15.824299,25.646729,4.190654,13.424299,,


In [31]:
final_tb.head(30)

Unnamed: 0,index,country,province,region_1,year,lat,lon,location,avg_temp,min_temp,max_temp,precipitation_in_mm,wind_average_kmh,wind_peak_kmh,sun_minutes
0,0,Italy,Sicily & Sardinia,Etna,2013,40.047396,8.286983,1,,,,,,,
1,1,Portugal,Douro,,2011,39.886265,-8.268731,1,19.021028,13.725503,26.029717,9.736842,,,
2,2,US,Oregon,Willamette Valley,2013,45.420675,-122.670649,1,16.361215,11.38271,22.425234,1.881776,9.025701,,
3,3,US,Michigan,Lake Michigan Shore,2013,41.707539,-86.89503,1,16.006103,10.642254,21.388732,0.778358,8.595283,,
4,4,US,Oregon,Willamette Valley,2012,45.420675,-122.670649,1,16.184579,11.049533,22.211215,2.03271,9.443458,,
5,6,Italy,Sicily & Sardinia,Vittoria,2013,40.047396,8.286983,1,,,,,,,
6,7,France,Alsace,Alsace,2012,48.684873,2.175608,1,14.992991,11.049533,19.220093,2.13972,11.940952,,
7,8,Germany,Rheinhessen,,2013,49.904518,8.271108,1,13.610748,9.766355,17.928037,1.964019,8.81934,,
8,10,US,California,Napa Valley,2011,38.628683,-92.565963,1,,,,,,,
9,12,US,California,Alexander Valley,2012,38.628683,-92.565963,1,,,,,,,


#### Check null values

In [32]:
final_tb["avg_temp"].isnull().sum()

3352

In [33]:
final_tb["sun_minutes"].isnull().sum()

4951

In [34]:
len(final_tb)

5074

In [35]:
final_tb.to_csv('../Data/temp_table.csv')  

## Handling missing values

In [36]:
final_tb = pd.read_csv('../Data/temp_table.csv', index_col=0) 
final_tb['location'] = final_tb['location'].astype('int')
final_tb['year'] = final_tb['year'].astype('int')

In [37]:
final_tb.isnull().sum()

index                     0
country                   0
province                  0
region_1               1137
year                      0
lat                       0
lon                       0
location                  0
avg_temp               3352
min_temp               3348
max_temp               3349
precipitation_in_mm    3662
wind_average_kmh       3501
wind_peak_kmh          5021
sun_minutes            4951
dtype: int64

### 1. Country = US & Province = California
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Original geo-coordinates
        - lat = 38.628683
        - lon = -92.565963
    - New geo-coordinates
        - lat = 36.778259
        - lon = -119.417931

In [38]:
wrong_geo = final_tb[(final_tb["province"] == "California") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(36.778259,-119.417931))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [39]:
final_tb[(final_tb["country"] == "US") & (final_tb["province"] =="California")]

Unnamed: 0,index,country,province,region_1,year,lat,lon,location,avg_temp,min_temp,max_temp,precipitation_in_mm,wind_average_kmh,wind_peak_kmh,sun_minutes
8,10.0,US,California,Napa Valley,2011.0,36.778259,-119.417931,1.0,,,,,,,
9,12.0,US,California,Alexander Valley,2012.0,36.778259,-119.417931,1.0,,,,,,,
11,14.0,US,California,Central Coast,2012.0,36.778259,-119.417931,1.0,,,,,,,
16,23.0,US,California,Paso Robles,2011.0,36.778259,-119.417931,1.0,,,,,,,
18,25.0,US,California,Sonoma Coast,2011.0,36.778259,-119.417931,1.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5060,128256.0,US,California,Contra Costa County,2008.0,36.778259,-119.417931,1.0,,,,,,,
5062,128986.0,US,California,Napa-Sonoma,2008.0,36.778259,-119.417931,1.0,,,,,,,
5065,129214.0,US,California,San Francisco Bay-Livermore Valley,2009.0,36.778259,-119.417931,1.0,,,,,,,
5069,129618.0,US,California,San Antonio Valley,2009.0,36.778259,-119.417931,1.0,,,,,,,


### 2. Country = Italy & Province = Sicily & Sardinia
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code

    - Original geo-coordinates
        - lat = 40.047396
        - lon = 8.286983
    - New geo-coordinates
        - lat = 39.373062
        - lon = 9.157212

In [40]:
wrong_geo = final_tb[(final_tb["province"] == "Sicily & Sardinia") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(39.373062,9.157212))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [41]:
final_tb[(final_tb["province"] == "Sicily & Sardinia") & (final_tb['avg_temp'].isna() ==True)]

Unnamed: 0,index,country,province,region_1,year,lat,lon,location,avg_temp,min_temp,max_temp,precipitation_in_mm,wind_average_kmh,wind_peak_kmh,sun_minutes
0,0.0,Italy,Sicily & Sardinia,Etna,2013.0,39.373062,9.157212,1.0,,,,,,,
5,6.0,Italy,Sicily & Sardinia,Vittoria,2013.0,39.373062,9.157212,1.0,,,,,,,
10,13.0,Italy,Sicily & Sardinia,Etna,2012.0,39.373062,9.157212,1.0,,,,,,,
15,22.0,Italy,Sicily & Sardinia,Sicilia,2007.0,39.373062,9.157212,1.0,,,,,,,
17,24.0,Italy,Sicily & Sardinia,Sicilia,2009.0,39.373062,9.157212,1.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4934,117050.0,Italy,Sicily & Sardinia,Alghero,2010.0,39.373062,9.157212,1.0,,,,,,,
4960,119400.0,Italy,Sicily & Sardinia,Erice,2013.0,39.373062,9.157212,1.0,,,,,,,
4966,120030.0,Italy,Sicily & Sardinia,Malvasia delle Lipari,2010.0,39.373062,9.157212,1.0,,,,,,,
5029,125237.0,Italy,Sicily & Sardinia,Contea di Sclafani,2014.0,39.373062,9.157212,1.0,,,,,,,


### 3. Country = Australia & Province = South Australia
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code

    - Original geo-coordinates
        - lat = -34.741121
        - lon = 138.656437
    - New geo-coordinates
        - lat = 39.373062
        - lon = 9.157212

In [42]:
wrong_geo = final_tb[(final_tb["province"] == "South Australia") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(-34.064999, 150.814163))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### 4. Country = Portugal & Province = Tejo
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code

    - Original geo-coordinates
        - lat = 39.409227
        - lon = -8.205827
    - New geo-coordinates
        - lat = 38.74908340
        - lon = -9.13983090

In [43]:
wrong_geo = final_tb[(final_tb["province"] == "Tejo") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(38.74908340,-9.13983090))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [44]:
final_tb[(final_tb["province"] == "Tejo") & (final_tb['avg_temp'].isna() ==True)]

Unnamed: 0,index,country,province,region_1,year,lat,lon,location,avg_temp,min_temp,max_temp,precipitation_in_mm,wind_average_kmh,wind_peak_kmh,sun_minutes
50,79.0,Portugal,Tejo,,2014.0,38.749083,-9.139831,1.0,,,,,,,
299,646.0,Portugal,Tejo,,2013.0,38.749083,-9.139831,1.0,,,,,,,
395,921.0,Portugal,Tejo,,2015.0,38.749083,-9.139831,1.0,,,,,,,
606,1617.0,Portugal,Tejo,,2016.0,38.749083,-9.139831,1.0,,,,,,,
697,2033.0,Portugal,Tejo,,2012.0,38.749083,-9.139831,1.0,,,,,,,
824,2652.0,Portugal,Tejo,,2009.0,38.749083,-9.139831,1.0,,,,,,,
865,2866.0,Portugal,Tejo,,2011.0,38.749083,-9.139831,1.0,,,,,,,
1232,4916.0,Portugal,Tejo,,2010.0,38.749083,-9.139831,1.0,,,,,,,
2290,15028.0,Portugal,Tejo,,2008.0,38.749083,-9.139831,1.0,,,,,,,
2956,27267.0,Portugal,Tejo,,2007.0,38.749083,-9.139831,1.0,,,,,,,


### 5. Country = Greece & Province = Naoussa
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted

In [45]:
wrong_geo = final_tb[(final_tb["province"] == "Naoussa") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(40.6308695, 40.428022))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### 6. Country = Greece & Province = Nemea
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted

In [46]:
wrong_geo = final_tb[(final_tb["country"] == "Greece") & (final_tb["province"] == "Nemea")]
fix_geo_data(wrong_geo, Point(37.955894,23.702099))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [47]:
final_tb[(final_tb["country"] == "Greece") & (final_tb["province"] == "Nemea")]

Unnamed: 0,index,country,province,region_1,year,lat,lon,location,avg_temp,min_temp,max_temp,precipitation_in_mm,wind_average_kmh,wind_peak_kmh,sun_minutes
732,2166.0,Greece,Nemea,,2012.0,37.955894,23.702099,1.0,,,,,,,
1211,4794.0,Greece,Nemea,,2008.0,37.955894,23.702099,1.0,,,,,,,
1804,9345.0,Greece,Nemea,,2004.0,37.955894,23.702099,1.0,,,,,,,
1908,10489.0,Greece,Nemea,,2011.0,37.955894,23.702099,1.0,,,,,,,
2272,14840.0,Greece,Nemea,,2005.0,37.955894,23.702099,1.0,,,,,,,
3113,30760.0,Greece,Nemea,,2006.0,37.955894,23.702099,1.0,,,,,,,
3202,32517.0,Greece,Nemea,,2007.0,37.955894,23.702099,1.0,,,,,,,
3237,33439.0,Greece,Nemea,,2010.0,37.955894,23.702099,1.0,,,,,,,
3270,34283.0,Greece,Nemea,,2009.0,37.955894,23.702099,1.0,,,,,,,
3619,44938.0,Greece,Nemea,,2003.0,37.955894,23.702099,1.0,,,,,,,


### 7. Country = Austria & Province = Kremstal & Niederösterreich & Eisenberg
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted

In [48]:
wrong_geo = final_tb[(final_tb["province"] == "Kremstal") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(48.409990,15.603840))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [49]:
wrong_geo = final_tb[(final_tb["province"] == "Eisenberg") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(46.903996384, 16.138499446))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [50]:
wrong_geo = final_tb[(final_tb["province"] == "Niederösterreich") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(48.33, 15.75))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### 8. Country = Spain & Province = Catalonia & Andalucia
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted

In [51]:
wrong_geo = final_tb[(final_tb["province"] == "Catalonia") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(41.390205,2.154007))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [52]:
wrong_geo = final_tb[(final_tb["province"] == "Andalucia") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(37.178055,-3.600833))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### 9. Country = Italy & Province = Veneto
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted

In [53]:
wrong_geo = final_tb[(final_tb["province"] == "Veneto") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(45.666668,12.250000))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### 10. Country = France & Province = Champagne
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted

In [54]:
wrong_geo = final_tb[(final_tb["province"] == "Champagne") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(48.026628, 0.333235))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### 11. Country = US & Province = Arizona & Connecticut & Iowa & New Mexico & Texas & Vermont
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted


In [55]:
wrong_geo = final_tb[(final_tb["province"] == "Arizona") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(34.048927,-111.093735))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [56]:
wrong_geo = final_tb[(final_tb["province"] == "Connecticut") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(41.599998,-72.699997))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [57]:
wrong_geo = final_tb[(final_tb["province"] == "Iowa") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(29.749907, -95.358421))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [58]:
wrong_geo = final_tb[(final_tb["province"] == "New Mexico") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(35.106766,-106.629181))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [59]:
wrong_geo = final_tb[(final_tb["province"] == "Texas") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(42.032974,-93.581543))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [60]:
wrong_geo = final_tb[(final_tb["province"] == "Vermont") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(44.000000,-72.699997))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [61]:
final_tb['location'] = final_tb['location'].astype('int')
final_tb['year'] = final_tb['year'].astype('int')
final_tb.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5074 entries, 0 to 5073
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   index                5074 non-null   float64
 1   country              5074 non-null   object 
 2   province             5074 non-null   object 
 3   region_1             3937 non-null   object 
 4   year                 5074 non-null   int32  
 5   lat                  5074 non-null   float64
 6   lon                  5074 non-null   float64
 7   location             5074 non-null   int32  
 8   avg_temp             1722 non-null   float64
 9   min_temp             1726 non-null   float64
 10  max_temp             1725 non-null   float64
 11  precipitation_in_mm  1412 non-null   float64
 12  wind_average_kmh     1573 non-null   float64
 13  wind_peak_kmh        53 non-null     float64
 14  sun_minutes          123 non-null    float64
dtypes: float64(10), int32(2), object(3)
me

In [62]:
final_tb = get_temp_data(final_tb)

In [63]:
final_tb.isnull().sum()

index                     0
country                   0
province                  0
region_1               1137
year                      0
lat                       0
lon                       0
location                  0
avg_temp               1030
min_temp               1026
max_temp               1027
precipitation_in_mm    1372
wind_average_kmh       1226
wind_peak_kmh          5000
sun_minutes            4741
dtype: int64

In [64]:
len(final_tb)

5074

### Save final table as csv

In [66]:
#from pathlib import Path
final_tb.to_csv('../Data/temp_table.csv')