### Univariate Linear Regression

In [None]:
# Displaying data points and regregression line

import matplotlib.pyplot as plt
import seaborn as sns

sns.regplot(data=df_weather, x='DATA', y='PREDICTION', line_kws={"color": "red"})
plt.suptitle('Regression: Max temperatures / Sun hours')
plt.show()

In [None]:
from sklearn.linear_model import LinearRegression

# First of all, you have to define 2 series :
# X-axis : the correlated column
X = df_weather[['CORRELATED_COL']] 

# Y-axis : the column to predict
y = df_weather['PREDICTED_COL'] 

# Training the model
model_LR = LinearRegression().fit(X, y)

# Coefficient, the "a" in equation "aX + b"
model_LR.coef_

# Interception, the "b" in equation "aX + b"
model_LR.intercept_

# Prediction of one value
model_LR.predict([[10]])

# Prediction of multiple values
df_predict['PREDICTED_COL'] = model_LR.predict(df_weather[['CORRELATED_COL']])

### Multivariate Linear Regression

In [None]:
import plotly.express as px

# 3D plot with 2 correlated columns and predicted column
px.scatter_3d(x = 'CORRELATED_COL_1', 
              y = 'CORRELATED_COL_2', 
              z= 'PREDICTED_COL', 
              data_frame=df_weather)

In [None]:
from sklearn.linear_model import LinearRegression

# Fitting and predicting
model_LR = LinearRegression().fit(X, y)
df_predict['PREDICTED_COL'] = model_LR.predict(df_weather[['CORRELATED_COL_1','CORRELATED_COL_2']])

# R2 score
score = model_LR.score(X, y)

### Global Linear Regression with split samples

In [None]:
from sklearn.model_selection import train_test_split

X = df_dataset[['CORRELATED_COL_1','CORRELATED_COL_2']]
y = df_dataset['PREDICTED_COL']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, train_size = 0.75)

model_LR = LinearRegression().fit(X_train, y_train)

# Compring both scores :
print("\nScore for the Train dataset :", model_LR.score(X_train, y_train))
print("Score for the Test dataset :", model_LR.score(X_test, y_test))


### Logistic Regression Grid Search (full search)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

LR = LogisticRegression()
LRparam_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
    'penalty': ['l1', 'l2'],
    # 'max_iter': list(range(1000,9000,2000)),
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
}
LR_search = GridSearchCV(LR, param_grid=LRparam_grid, refit = True, verbose = 3, cv=5)

# fitting the model for grid search 
LR_search.fit(X, y)

# summarize
print('Mean Accuracy: %.3f' % LR_search.best_score_)
print('Config: %s' % LR_search.best_params_)

### Logistic Regression Grid Search (reduced parameters)
Manual dichotomic search on C if not reaching conversion

In [None]:
#Logistic Regression Grid Search with usual working parameters
LRparam_grid = {'C': [0.1, 1, 10], 'max_iter': [10000]}
LR_search = GridSearchCV(LogisticRegression(), LRparam_grid )
LR_search.fit(X,y)
print("best parameters:",LR_search.best_params_)