In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
df=pd.read_csv('ortalama_alinmis.csv')
df.drop(['Unnamed: 0'],inplace=True,axis=1)
df=df.set_index('date_time')
df['Date']=np.arange(len(df.index))
df=df.reset_index()

In [5]:
cols = df.columns.to_list()
cols=cols[-1::2]+cols[1:-1]
df=df[cols]
X=df.iloc[:,:-1]
y=df.iloc[:,-1]

In [6]:
X['H_data']=X['H_data']/max(X['H_data'])
X['AH_data']=X['AH_data']/max(X['AH_data'])
X.iloc[:,1:-2]=(X.iloc[:,1:-2])/2500

In [7]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test= train_test_split(X,y,test_size=0.30,random_state=53)

In [8]:
import pickle 
with open ('model.pkl','rb') as f:
    model= pickle.load(f)

In [9]:
model.predict(X_test)

array([0.7924972 , 0.8877193 , 0.72961553, ..., 0.79742441, 0.72399403,
       0.8901829 ])

In [11]:
score=model.score(X_test,y_test)
score

0.9389148752515426

# DecisionTreeRegressor


In [6]:
from sklearn.tree import DecisionTreeRegressor
model_tree=DecisionTreeRegressor()

In [7]:
model_tree.fit(X_train,y_train)

In [8]:
basic_pred=model_tree.predict(X_test)

In [9]:
model_tree.score(X_train,y_train)

1.0

In [10]:
model_tree.score(X_test,y_test)

0.8492403918153925

# GridSearchCV

In [11]:
from sklearn.model_selection import GridSearchCV

In [12]:
grid_params={"splitter":["best"],
            "max_depth" : [5,6,7,8,9,11,12,15,25,50],
            "min_samples_leaf":[3,4,5,10,15,20,25,30,40,50],
            "min_weight_fraction_leaf":[0.00001,0.00005,0.0001,0.0005,0.001,0.01,0.05,0.1,0.2,0.3,0.4,0.5],
            "max_leaf_nodes":[20,30,50,70,100,150,200,300,500]}

grid_model=GridSearchCV(model_tree,grid_params,cv=10)

In [None]:
grid_model.fit(X_train,y_train)

In [None]:
grid_model.best_params_

{'max_depth': 25,
 'max_leaf_nodes': 500,
 'min_samples_leaf': 25,
 'min_weight_fraction_leaf': 0.001,
 'splitter': 'best'}

In [14]:
model_tree=DecisionTreeRegressor(max_depth=25,
                                 max_leaf_nodes=500,
                                 min_samples_leaf=25,
                                 min_weight_fraction_leaf=0.001,
                                 splitter='best'
                                 )

In [16]:
model_tree.fit(X_train,y_train)

In [17]:
pred_tree=model_tree.predict(X_test)

# RandomForestRegressor

In [6]:
from sklearn.ensemble import RandomForestRegressor
random_model=RandomForestRegressor()
from sklearn.model_selection import GridSearchCV
params_grid={'n_estimators':[170,180,190,200,210]}
grid_random=GridSearchCV(random_model,params_grid,cv=5,n_jobs=-1)

In [10]:
grid_random.fit(X_train,y_train)

In [20]:
grid_random.best_params_

{'n_estimators': 190}

In [7]:
random=RandomForestRegressor(n_estimators=190)
random.fit(X_train,y_train)

In [8]:
grid_random_pred=random.predict(X_test)

# PerformanceMetrices


In [9]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,mean_absolute_percentage_error


def get_score(pred):
    mse=mean_squared_error(y_test,pred)
    mae=mean_absolute_error(y_test,pred)
    mape=mean_absolute_percentage_error(y_test,pred)

    return print(f'mean_squared_error={mse}\nmean_absolute_error={mae}\nmean_absolute_percentage_error={mape}')

In [24]:
#TunedTree
model_tree.score(X_train,y_train)

0.9225132048022083

In [10]:
#RandomForest n_estimators=200
random.score(X_train,y_train)

0.9920907109181025

In [29]:
grid_random.score(X_test,y_test)

0.9389633227570842

In [30]:
#BasicTree
get_score(basic_pred)

mean_squared_error=0.0006403769399144645
mean_absolute_error=0.018592802827807637
mean_absolute_percentage_error=0.022268658666025915


In [31]:
#TunedTree
get_score(pred_tree)

mean_squared_error=0.0005106554795197864
mean_absolute_error=0.016981328896011935
mean_absolute_percentage_error=0.020337698271270695


In [32]:
#RandomForest
get_score(grid_random_pred)

mean_squared_error=0.0002592636122236621
mean_absolute_error=0.011481944828830434
mean_absolute_percentage_error=0.013787784343789567


In [11]:
import pickle
with open('model.pkl','wb') as f :
    pickle.dump(random,f)

In [12]:
f.close()

# Cross Validation

In [None]:
##crossval basiclinear
from sklearn.model_selection import cross_val_score
scores=cross_val_score(grid_random,X_train,y_train,cv=5)
scores.mean()