In [1]:
import numpy as np
import pandas as pd
import torch
import datetime    

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.utils.validation import column_or_1d

In [2]:
df_city = pd.read_csv("city_attributes.csv")
df_humidity = pd.read_csv("humidity.csv")
df_pressure = pd.read_csv("pressure.csv")
df_temper = pd.read_csv("temperature.csv")
df_descript = pd.read_csv("weather_description.csv")
df_direction = pd.read_csv("wind_direction.csv")
df_speed = pd.read_csv("wind_speed.csv")

In [3]:
def group_by_days(df):
    df["date"] = pd.to_datetime(df["datetime"]).dt.date
    df = df.drop('datetime', axis=1)
    df = df.groupby(df["date"]).mean()
    return df

def group_by_days_descript(df):
    df["date"] = pd.to_datetime(df["datetime"]).dt.date
    df = df.fillna('no data')
    df = df.drop('datetime', axis=1)
    df = df.groupby(df["date"]).agg(lambda x: pd.Series.mode(x)[0])
    return df

In [4]:
df_humidity = group_by_days(df_humidity)
df_pressure = group_by_days(df_pressure)
df_temper = group_by_days(df_temper)
df_descript = group_by_days_descript(df_descript)
df_direction = group_by_days(df_direction)
df_speed = group_by_days(df_speed)

In [5]:
encoder = LabelEncoder()
df_descript= df_descript.apply(encoder.fit_transform)

In [6]:
def create_dataframe_for_city(city_name):
    df =  pd.concat([
        df_humidity[city_name], df_pressure[city_name], df_temper[city_name], df_descript[city_name], df_direction[city_name], df_speed[city_name]
    ], axis=1)
    df.columns = ['humidity', 'pressure', 'temperature', 'description', 'wind_direction','wind_speed' ]
    return df

In [7]:
df_Portland  =  create_dataframe_for_city("Portland")

In [8]:
df_Portland

Unnamed: 0_level_0,humidity,pressure,temperature,description,wind_direction,wind_speed
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2012-10-01,78.727273,1024.000000,282.118197,11,57.727273,0.000000
2012-10-02,65.833333,1023.583333,286.137728,11,214.041667,1.291667
2012-10-03,66.208333,1021.083333,289.599792,2,228.333333,2.625000
2012-10-04,51.166667,1022.875000,286.482500,12,206.750000,4.625000
2012-10-05,40.391304,1022.916667,288.286042,12,182.250000,3.708333
...,...,...,...,...,...,...
2017-11-26,87.625000,1004.500000,281.588333,8,153.750000,4.958333
2017-11-27,92.875000,1015.625000,279.922500,8,158.083333,1.666667
2017-11-28,88.458333,1019.791667,280.085833,8,144.166667,3.041667
2017-11-29,85.500000,1027.625000,279.860833,8,191.125000,2.166667


# Przewidywanie temperatury

In [9]:
X = df_Portland.iloc[:-2] 
y =  df_Portland['temperature'].iloc[4:] 

X.reset_index(inplace= True)
X['day_of_year'] = pd.to_datetime(X['date']).dt.dayofyear
X = X.drop('date', axis=1)
#X = X.drop('humidity', axis=1)
#X = X.drop('pressure', axis=1)
#X = X.drop('description', axis=1)
#X = X.drop('wind_direction', axis=1)
#X = X.drop('wind_speed', axis=1)
X= pd.concat([X.iloc[:-2].reset_index(drop=True),X.iloc[1:-1].reset_index(drop=True),X.iloc[2:].reset_index(drop=True)],axis = 1)
X

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['day_of_year'] = pd.to_datetime(X['date']).dt.dayofyear


Unnamed: 0,humidity,pressure,temperature,description,wind_direction,wind_speed,day_of_year,humidity.1,pressure.1,temperature.1,...,wind_direction.1,wind_speed.1,day_of_year.1,humidity.2,pressure.2,temperature.2,description.1,wind_direction.2,wind_speed.2,day_of_year.2
0,78.727273,1024.000000,282.118197,11,57.727273,0.000000,275,65.833333,1023.583333,286.137728,...,214.041667,1.291667,276,66.208333,1021.083333,289.599792,2,228.333333,2.625000,277
1,65.833333,1023.583333,286.137728,11,214.041667,1.291667,276,66.208333,1021.083333,289.599792,...,228.333333,2.625000,277,51.166667,1022.875000,286.482500,12,206.750000,4.625000,278
2,66.208333,1021.083333,289.599792,2,228.333333,2.625000,277,51.166667,1022.875000,286.482500,...,206.750000,4.625000,278,40.391304,1022.916667,288.286042,12,182.250000,3.708333,279
3,51.166667,1022.875000,286.482500,12,206.750000,4.625000,278,40.391304,1022.916667,288.286042,...,182.250000,3.708333,279,40.750000,1023.333333,288.291042,12,134.041667,3.666667,280
4,40.391304,1022.916667,288.286042,12,182.250000,3.708333,279,40.750000,1023.333333,288.291042,...,134.041667,3.666667,280,36.681818,1021.000000,288.262500,12,119.166667,4.500000,281
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1878,94.083333,1016.791667,285.447917,8,120.000000,5.583333,326,88.250000,1013.833333,288.386667,...,174.166667,4.666667,327,83.083333,1019.166667,282.543750,0,199.583333,2.791667,328
1879,88.250000,1013.833333,288.386667,7,174.166667,4.666667,327,83.083333,1019.166667,282.543750,...,199.583333,2.791667,328,87.291667,1017.541667,279.293333,8,140.500000,2.500000,329
1880,83.083333,1019.166667,282.543750,0,199.583333,2.791667,328,87.291667,1017.541667,279.293333,...,140.500000,2.500000,329,87.625000,1004.500000,281.588333,8,153.750000,4.958333,330
1881,87.291667,1017.541667,279.293333,8,140.500000,2.500000,329,87.625000,1004.500000,281.588333,...,153.750000,4.958333,330,92.875000,1015.625000,279.922500,8,158.083333,1.666667,331


In [10]:
X_train, X_test, y_train, y_test = train_test_split(
           X, y, test_size = 0.3, random_state = 0,shuffle=False)

In [11]:
net = MLPRegressor(random_state=0, max_iter=1000, activation ='relu', learning_rate = 'adaptive', learning_rate_init = 0.00001, hidden_layer_sizes=1000)

In [12]:
net.fit(X_train, y_train)



In [13]:
def abs_error(A,B):
    err = 0
    for i in range(len(A)):
        err = max(err, abs(A.iloc[i] - B[i]))
    return err

In [14]:
y_pred1 = net.predict(X_train)
err = abs_error(y_train, y_pred1)
print(err)

12.988531810148515


In [15]:
y_pred2 = net.predict(X_test)
err = abs_error(y_test, y_pred2)
print(err)

12.067587376166784


## Przewidywanie siły wiatru

In [16]:
X = df_Portland.iloc[:-2] 
y =  df_Portland['wind_speed'].iloc[4:] 

X.reset_index(inplace= True)
X['day_of_year'] = pd.to_datetime(X['date']).dt.dayofyear
X = X.drop('date', axis=1)
X= pd.concat([X.iloc[:-2].reset_index(drop=True),X.iloc[1:-1].reset_index(drop=True),X.iloc[2:].reset_index(drop=True)],axis = 1)
X

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['day_of_year'] = pd.to_datetime(X['date']).dt.dayofyear


Unnamed: 0,humidity,pressure,temperature,description,wind_direction,wind_speed,day_of_year,humidity.1,pressure.1,temperature.1,...,wind_direction.1,wind_speed.1,day_of_year.1,humidity.2,pressure.2,temperature.2,description.1,wind_direction.2,wind_speed.2,day_of_year.2
0,78.727273,1024.000000,282.118197,11,57.727273,0.000000,275,65.833333,1023.583333,286.137728,...,214.041667,1.291667,276,66.208333,1021.083333,289.599792,2,228.333333,2.625000,277
1,65.833333,1023.583333,286.137728,11,214.041667,1.291667,276,66.208333,1021.083333,289.599792,...,228.333333,2.625000,277,51.166667,1022.875000,286.482500,12,206.750000,4.625000,278
2,66.208333,1021.083333,289.599792,2,228.333333,2.625000,277,51.166667,1022.875000,286.482500,...,206.750000,4.625000,278,40.391304,1022.916667,288.286042,12,182.250000,3.708333,279
3,51.166667,1022.875000,286.482500,12,206.750000,4.625000,278,40.391304,1022.916667,288.286042,...,182.250000,3.708333,279,40.750000,1023.333333,288.291042,12,134.041667,3.666667,280
4,40.391304,1022.916667,288.286042,12,182.250000,3.708333,279,40.750000,1023.333333,288.291042,...,134.041667,3.666667,280,36.681818,1021.000000,288.262500,12,119.166667,4.500000,281
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1878,94.083333,1016.791667,285.447917,8,120.000000,5.583333,326,88.250000,1013.833333,288.386667,...,174.166667,4.666667,327,83.083333,1019.166667,282.543750,0,199.583333,2.791667,328
1879,88.250000,1013.833333,288.386667,7,174.166667,4.666667,327,83.083333,1019.166667,282.543750,...,199.583333,2.791667,328,87.291667,1017.541667,279.293333,8,140.500000,2.500000,329
1880,83.083333,1019.166667,282.543750,0,199.583333,2.791667,328,87.291667,1017.541667,279.293333,...,140.500000,2.500000,329,87.625000,1004.500000,281.588333,8,153.750000,4.958333,330
1881,87.291667,1017.541667,279.293333,8,140.500000,2.500000,329,87.625000,1004.500000,281.588333,...,153.750000,4.958333,330,92.875000,1015.625000,279.922500,8,158.083333,1.666667,331


In [17]:
y = pd.DataFrame(y)
y['wind_is_strong'] = y['wind_speed'].apply(lambda x: 1 if x >= 6 else 0)
y = y.drop('wind_speed', axis=1)
y = column_or_1d(y, warn=True)
y

  y = column_or_1d(y, warn=True)


array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [18]:
X_train, X_test, y_train, y_test = train_test_split(
           X, y, test_size = 0.3, random_state = 0,shuffle=False)
    
net = MLPClassifier(random_state=0, max_iter=1000, activation ='relu', learning_rate = 'adaptive', learning_rate_init = 0.00001, hidden_layer_sizes=1000)

In [19]:
net.fit(X_train, y_train)

In [20]:
y_pred1 = net.predict(X_train)
err = accuracy_score(y_train, y_pred1)
print(err)

0.9901365705614568


In [21]:
y_pred2 = net.predict(X_test)
err = accuracy_score(y_test, y_pred2)
print(err)

0.9787610619469026
