# Get weather data

## Imports and functions

In [None]:
import pandas as pd
import numpy as np

# Import Meteostat library and dependencies
from datetime import datetime
import matplotlib.pyplot as plt
from meteostat import Point, Daily

In [None]:
def get_time_period(df):
    year = df['year']

    start_month = int(4) if int(df['location']) == 1 else int(10)
    start = datetime(year, start_month, 1)

    end_month = int(10) if int(df['location']) == 1 else int(4)
    end_day = int(31) if int(df['location']) == 1 else int(30)
    end = datetime(year, end_month, end_day)

    return (start, end)

In [None]:
def get_temp_data(df):
    for index, row in df.iterrows():
        start, end = get_time_period(row)
        row_lat = float(row['lat'])
        row_lon = float(row['lon'])
        regions = Point(row_lat,row_lon)

        data = Daily(regions, start, end)
        data = data.fetch()

        data = data[['tavg', 'tmin', 'tmax', 'prcp', 'wspd', 'wpgt', 'tsun']]

        avg_temp = data['tavg'].mean()
        min_temp = data['tmin'].mean()
        max_temp = data['tmax'].mean()
        precipitation = data['prcp'].mean()
        wind_speed = data['wspd'].mean()
        wind_peak = data['wpgt'].mean()
        sunshine = data['tsun'].mean()

        df.at[index, 'avg_temp'] = avg_temp
        df.at[index, 'min_temp'] = min_temp
        df.at[index, 'max_temp'] = max_temp
        df.at[index, 'precipitation_in_mm'] = precipitation
        df.at[index, 'wind_average_kmh'] = wind_speed
        df.at[index, 'wind_peak_kmh'] = wind_peak
        df.at[index, 'sun_minutes'] = sunshine
        
    return df

In [None]:
def fix_geo_data(df, point):
    df['lat'] = df['lat'].replace(df['lat'].iloc[0], point._lat)
    df['lon'] = df['lon'].replace(df['lon'].iloc[0], point._lon)

    return df

## Import csvs
- winemag-data-130k-v2+extract.csv
- winemag-data-130k-v2+geo.csv

In [None]:
df = pd.read_csv('../Data/winemag-data-130k-v2+extract.csv', sep=';')

In [None]:
df = df[['country','province','region_1','year']]

In [None]:
df_geo = pd.read_csv('../Data/winemag-data-130k-v2+geo.csv', sep=';', index_col=0)

In [None]:
df_geo = df_geo[['lat','lon']]

In [None]:
# will perform left join
result = df.join(df_geo)

In [None]:
result.info()

### Drop duplicates

In [None]:
result = result.drop_duplicates()

In [None]:
result.info()

### Subset - year between 1990 and 2017

In [None]:
result_year = result[(result["year"] >= 1990) & (result["year"] <= 2023)]

In [None]:
print(result_year.isnull().sum())

### Drop NA values

In [None]:
df_year = result_year[result_year['lat'].notna()]

In [None]:
df_year.reset_index(inplace=True, drop=True)

In [None]:
df_year.info()

### Fix data types

In [None]:
df_year['year'] = df_year['year'].astype('int')

In [None]:
final_tb = df_year
final_tb.info()

### Add new column "location"

In [None]:
# check if location is in north or south of the earth
lat_list = final_tb['lat']
location = []
for n in lat_list:
    if n >= 0:
        location.append(1)
    else:
        location.append(0)

final_tb['location'] = location
final_tb['location'] = final_tb['location'].astype('int')

In [None]:
final_tb.to_csv("../Data/resource_table.csv")

## Get temperature data

In [None]:
final_tb = pd.read_csv("../Data/resource_table.csv", index_col=0)

### Test with an example

In [None]:
# Create Point for Vancouver, BC
start, end = get_time_period(final_tb[7:8])
vancouver = Point(49.2497, -123.1193, 70)

In [None]:
# Get daily data for year of first_row
data = Daily(vancouver, start, end)
data = data.fetch()

In [None]:
data

In [None]:
avg_temp = data['tavg'].mean()
avg_temp

In [None]:
min_temp = data['tmin'].min()
min_temp

In [None]:
max_temp = data['tmax'].max()
max_temp

### Get data with function

In [None]:
data = final_tb[7:8]
result = get_temp_data(data)
result.info()

In [None]:
final_tb = get_temp_data(final_tb)
final_tb

In [None]:
final_tb.head(30)

#### Check null values

In [None]:
final_tb["avg_temp"].isnull().sum()

In [None]:
final_tb["sun_minutes"].isnull().sum()

In [None]:
len(final_tb)

In [None]:
final_tb.to_csv('../Data/temp_table.csv')  

## Handling missing values

In [191]:
final_tb = pd.read_csv('../Data/temp_table.csv', index_col=0) 
final_tb['location'] = final_tb['location'].astype('int')
final_tb['year'] = final_tb['year'].astype('int')

In [192]:
final_tb.isnull().sum()

country                   0
province                  0
region_1               1137
year                      0
lat                       0
lon                       0
location                  0
avg_temp               3352
min_temp               3348
max_temp               3349
precipitation_in_mm    3662
wind_average_kmh       3501
wind_peak_kmh          5021
sun_minutes            4951
dtype: int64

### 1. Country = US & Province = California
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Original geo-coordinates
        - lat = 38.628683
        - lon = -92.565963
    - New geo-coordinates
        - lat = 36.778259
        - lon = -119.417931

In [193]:
wrong_geo = final_tb[(final_tb["province"] == "California") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(36.778259,-119.417931))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [194]:
final_tb[(final_tb["country"] == "US") & (final_tb["province"] =="California")]

Unnamed: 0,country,province,region_1,year,lat,lon,location,avg_temp,min_temp,max_temp,precipitation_in_mm,wind_average_kmh,wind_peak_kmh,sun_minutes
8,US,California,Napa Valley,2011.0,36.778259,-119.417931,1.0,,,,,,,
9,US,California,Alexander Valley,2012.0,36.778259,-119.417931,1.0,,,,,,,
11,US,California,Central Coast,2012.0,36.778259,-119.417931,1.0,,,,,,,
16,US,California,Paso Robles,2011.0,36.778259,-119.417931,1.0,,,,,,,
18,US,California,Sonoma Coast,2011.0,36.778259,-119.417931,1.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5060,US,California,Contra Costa County,2008.0,36.778259,-119.417931,1.0,,,,,,,
5062,US,California,Napa-Sonoma,2008.0,36.778259,-119.417931,1.0,,,,,,,
5065,US,California,San Francisco Bay-Livermore Valley,2009.0,36.778259,-119.417931,1.0,,,,,,,
5069,US,California,San Antonio Valley,2009.0,36.778259,-119.417931,1.0,,,,,,,


### 2. Country = Italy & Province = Sicily & Sardinia
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code

    - Original geo-coordinates
        - lat = 40.047396
        - lon = 8.286983
    - New geo-coordinates
        - lat = 39.373062
        - lon = 9.157212

In [195]:
wrong_geo = final_tb[(final_tb["province"] == "Sicily & Sardinia") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(39.373062,9.157212))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [196]:
final_tb[(final_tb["province"] == "Sicily & Sardinia") & (final_tb['avg_temp'].isna() ==True)]

Unnamed: 0,country,province,region_1,year,lat,lon,location,avg_temp,min_temp,max_temp,precipitation_in_mm,wind_average_kmh,wind_peak_kmh,sun_minutes
0,test,Sicily & Sardinia,Etna,2013.0,39.373062,9.157212,1.0,,,,,,,
5,Italy,Sicily & Sardinia,Vittoria,2013.0,39.373062,9.157212,1.0,,,,,,,
10,Italy,Sicily & Sardinia,Etna,2012.0,39.373062,9.157212,1.0,,,,,,,
15,Italy,Sicily & Sardinia,Sicilia,2007.0,39.373062,9.157212,1.0,,,,,,,
17,Italy,Sicily & Sardinia,Sicilia,2009.0,39.373062,9.157212,1.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4934,Italy,Sicily & Sardinia,Alghero,2010.0,39.373062,9.157212,1.0,,,,,,,
4960,Italy,Sicily & Sardinia,Erice,2013.0,39.373062,9.157212,1.0,,,,,,,
4966,Italy,Sicily & Sardinia,Malvasia delle Lipari,2010.0,39.373062,9.157212,1.0,,,,,,,
5029,Italy,Sicily & Sardinia,Contea di Sclafani,2014.0,39.373062,9.157212,1.0,,,,,,,


### 3. Country = Australia & Province = South Australia
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code

    - Original geo-coordinates
        - lat = -34.741121
        - lon = 138.656437
    - New geo-coordinates
        - lat = 39.373062
        - lon = 9.157212

In [197]:
wrong_geo = final_tb[(final_tb["province"] == "South Australia") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(-34.064999, 150.814163))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### 4. Country = Portugal & Province = Tejo
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code

    - Original geo-coordinates
        - lat = 39.409227
        - lon = -8.205827
    - New geo-coordinates
        - lat = 38.74908340
        - lon = -9.13983090

In [198]:
wrong_geo = final_tb[(final_tb["province"] == "Tejo") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(38.74908340,-9.13983090))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [None]:
final_tb[(final_tb["province"] == "Tejo") & (final_tb['avg_temp'].isna() ==True)]

### 5. Country = Greece & Province = Naoussa
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted

In [199]:
wrong_geo = final_tb[(final_tb["province"] == "Naoussa") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(40.6308695, 40.428022))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### 6. Country = Greece & Province = Nemea
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted

In [200]:
wrong_geo = final_tb[(final_tb["country"] == "Greece") & (final_tb["province"] == "Nemea")]
fix_geo_data(wrong_geo, Point(37.955894,23.702099))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [None]:
final_tb[(final_tb["country"] == "Greece") & (final_tb["province"] == "Nemea")]

### 7. Country = Austria & Province = Kremstal & Niederösterreich & Eisenberg
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted

In [201]:
wrong_geo = final_tb[(final_tb["province"] == "Kremstal") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(48.409990,15.603840))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [202]:
wrong_geo = final_tb[(final_tb["province"] == "Eisenberg") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(46.903996384, 16.138499446))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [203]:
wrong_geo = final_tb[(final_tb["province"] == "Niederösterreich") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(48.33, 15.75))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### 8. Country = Spain & Province = Catalonia & Andalucia
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted

In [204]:
wrong_geo = final_tb[(final_tb["province"] == "Catalonia") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(41.390205,2.154007))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [205]:
final_tb[(final_tb["province"] == "Andalucia") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(37.178055,-3.600833))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### 9. Country = Italy & Province = Veneto
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted

In [206]:
wrong_geo = final_tb[(final_tb["province"] == "Veneto") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(45.666668,12.250000))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### 10. Country = France & Province = Champagne
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted

In [207]:
wrong_geo = final_tb[(final_tb["province"] == "Champagne") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(48.026628, 0.333235))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### 11. Country = US & Province = Arizona & Connecticut & Iowa & New Mexico & Texas & Vermont
- Check, whether the given geo-coordinates work. If it doesn't work, we should find another geo-coordinates and run the temperature code
    - Geo-coordinates have been adjusted


In [208]:
wrong_geo = final_tb[(final_tb["province"] == "Arizona") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(34.048927,-111.093735))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [209]:
wrong_geo = final_tb[(final_tb["province"] == "Connecticut") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(41.599998,-72.699997))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [210]:
wrong_geo = final_tb[(final_tb["province"] == "Iowa") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(29.749907, -95.358421))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [211]:
wrong_geo = final_tb[(final_tb["province"] == "New Mexico") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(35.106766,-106.629181))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [212]:
wrong_geo = final_tb[(final_tb["province"] == "Texas") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(42.032974,-93.581543))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [213]:
wrong_geo = final_tb[(final_tb["province"] == "Vermont") & (final_tb['avg_temp'].isna() ==True)]
fix_geo_data(wrong_geo, Point(44.000000,-72.699997))
final_tb.update(wrong_geo)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [214]:
final_tb['location'] = final_tb['location'].astype('int')
final_tb['year'] = final_tb['year'].astype('int')
final_tb.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5074 entries, 0 to 5073
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   country              5074 non-null   object 
 1   province             5074 non-null   object 
 2   region_1             3937 non-null   object 
 3   year                 5074 non-null   int32  
 4   lat                  5074 non-null   float64
 5   lon                  5074 non-null   float64
 6   location             5074 non-null   int32  
 7   avg_temp             1722 non-null   float64
 8   min_temp             1726 non-null   float64
 9   max_temp             1725 non-null   float64
 10  precipitation_in_mm  1412 non-null   float64
 11  wind_average_kmh     1573 non-null   float64
 12  wind_peak_kmh        53 non-null     float64
 13  sun_minutes          123 non-null    float64
dtypes: float64(9), int32(2), object(3)
memory usage: 555.0+ KB


In [216]:
final_tb = get_temp_data(final_tb)



In [217]:
final_tb.isnull().sum()

country                   0
province                  0
region_1               1137
year                      0
lat                       0
lon                       0
location                  0
avg_temp               1051
min_temp               1047
max_temp               1048
precipitation_in_mm    1393
wind_average_kmh       1274
wind_peak_kmh          5000
sun_minutes            4741
dtype: int64

In [218]:
len(final_tb)

5074

### Save final table as csv

In [219]:
#from pathlib import Path
final_tb.to_csv('../Data/temp_table.csv')