### Applying logistic regression

In [1]:
from datetime import datetime
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=DeprecationWarning)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

### Read training data

In [2]:
X_train = pd.read_csv("X_train_df.csv")
y_train = pd.read_csv("y_train_df.csv")

In [3]:
X_train.head(3)

Unnamed: 0,neighbourhood,latitude,longitude,price,minimum_nights,number_of_reviews,availability_365
0,119,0.230733,0.416463,0.230769,0.0,0.05474,0.2
1,88,0.329646,0.557894,0.414716,0.001779,0.0,0.0
2,5,0.682529,0.355775,0.364548,0.00089,0.00534,0.909589


In [4]:
y_train.head(3)

Unnamed: 0,room_type
0,2
1,0
2,2


In [16]:
lg_reg = LogisticRegression(random_state=0, solver="newton-cg", multi_class="multinomial", max_iter=1000)

In [17]:
parameter = {'C':[0.10, 10, 100, 1000]}

In [18]:
grid_search_cv = GridSearchCV(estimator=lg_reg, param_grid = parameter, cv=5)

In [19]:
grid_search_cv

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=1000, multi_class='multinomial',
          n_jobs=None, penalty='l2', random_state=0, solver='newton-cg',
          tol=0.0001, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [0.1, 10, 100, 1000]}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score='warn', scoring=None, verbose=0)

In [20]:
dir(grid_search_cv)

['__abstractmethods__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_check_is_fitted',
 '_estimator_type',
 '_format_results',
 '_get_param_names',
 '_run_search',
 'classes_',
 'cv',
 'decision_function',
 'error_score',
 'estimator',
 'fit',
 'fit_params',
 'get_params',
 'iid',
 'inverse_transform',
 'n_jobs',
 'param_grid',
 'pre_dispatch',
 'predict',
 'predict_log_proba',
 'predict_proba',
 'refit',
 'return_train_score',
 'score',
 'scoring',
 'set_params',
 'transform',
 'verbose']

In [21]:
grid_search_cv.fit(X_train, y_train.values.ravel())

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=1000, multi_class='multinomial',
          n_jobs=None, penalty='l2', random_state=0, solver='newton-cg',
          tol=0.0001, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [0.1, 10, 100, 1000]}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score='warn', scoring=None, verbose=0)

In [22]:
grid_search_cv.best_score_

0.8115334853058961

In [23]:
grid_search_cv.best_estimator_

LogisticRegression(C=10, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=1000, multi_class='multinomial',
          n_jobs=None, penalty='l2', random_state=0, solver='newton-cg',
          tol=0.0001, verbose=0, warm_start=False)

In [24]:
grid_search_cv.best_params_

{'C': 10}

In [25]:
import joblib

In [26]:
joblib.dump(grid_search_cv, 'Logistic Regression.pkl')

['Logistic Regression.pkl']