In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score, classification_report

In [2]:
data = pd.read_csv('data.csv')
data.dtypes

hotel                              object
is_canceled                         int64
lead_time                           int64
arrival_date_year                   int64
arrival_date_month                 object
arrival_date_week_number            int64
arrival_date_day_of_month           int64
stays_in_weekend_nights             int64
stays_in_week_nights                int64
adults                              int64
children                            int64
babies                              int64
meal                               object
country                            object
market_segment                     object
distribution_channel               object
is_repeated_guest                   int64
previous_cancellations              int64
previous_bookings_not_canceled      int64
reserved_room_type                 object
assigned_room_type                 object
booking_changes                     int64
deposit_type                       object
agent                             

In [None]:
X = data.drop('is_canceled', axis=1)
y = data['is_canceled']

label_encoder = LabelEncoder()
for column in X.columns:
    if X[column].dtype == 'object':
        X[column] = label_encoder.fit_transform(X[column])
        
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = X_train.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
X_test = X_test.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

logistic_model_1 = LogisticRegression(max_iter=1000) 
logistic_model_2 = LogisticRegression(penalty='l2', C=1.0, solver='lbfgs', max_iter=1000) 

logistic_model_1.fit(X_train_scaled, y_train)
logistic_model_2.fit(X_train_scaled, y_train)

logistic_predictions_1 = logistic_model_1.predict(X_test_scaled)
logistic_predictions_2 = logistic_model_2.predict(X_test_scaled)

accuracy_1 = accuracy_score(y_test, logistic_predictions_1)
accuracy_2 = accuracy_score(y_test, logistic_predictions_2)

print(f"Точность (Логистическая регрессия - Начальные параметры) : {accuracy_1}")
print(f"Accuracy (Логистическая регрессия - Кастомные параметры) : {accuracy_2}")

report_1 = classification_report(y_test, logistic_predictions_1, zero_division=1)
report_2 = classification_report(y_test, logistic_predictions_2, zero_division=1)

print("Classification Report (Логистическая регрессия - Начальные параметры):")
print(report_1)

print("Classification Report (Логистическая регрессия - Кастомные параметры):")
print(report_2)

linear_model = LinearRegression()

linear_model.fit(X_train_scaled, y_train)

linear_predictions = linear_model.predict(X_test_scaled)

mse = mean_squared_error(y_test, linear_predictions)
r2 = r2_score(y_test, linear_predictions)

print(f"Среднеквадратичная ошибка (Линейная регрессия) : {mse}")
print(f"R^2 Score (Линейная регрессия) : {r2}")