In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import itertools as it
from MetricMethod import *
from phik import phik_matrix
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from imblearn.over_sampling import SMOTE
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

In [None]:
train = pd.read_csv("train_phone.csv")
test = pd.read_csv("test_phote.csv")
df = pd.concat([train, test])
df = df.drop(columns='id')
df.info()

In [None]:
df.head()

In [None]:
len(df.columns)

In [None]:
t = df.iloc[:, 0:9]
t.hist(figsize=(8, 12), xrot=15)

In [None]:
t = df.iloc[:, 9:]
t.hist(figsize=(8, 12), xrot=15)

In [None]:
def draw_box_plot(df: pd.DataFrame):
    t = df
    # Получение количества колонок
    num_columns = len(t.columns)

    # Расчет количества строк и столбцов для сетки
    num_rows = int((num_columns + 1) / 2)  # Округляем вверх до ближайшего целого числа

    # Создание сетки подграфиков
    fig, axes = plt.subplots(num_rows, 2, figsize=(8, 12))

    # Распаковка двумерного массива осей в одномерный массив
    axes = axes.flatten()

    # Построение ящиковых диаграмм для каждой колонки
    for i, column in enumerate(t.columns):
        # Построение ящиковой диаграммы для текущей колонки
        axes[i].boxplot(t[column].astype(float))
        axes[i].grid(True)
        
        # Установка заголовка для текущего графика
        axes[i].set_title(column)

    # Удаление пустых подграфиков
    if num_columns % 2 != 0:
        fig.delaxes(axes[-1])

    # Расположение подграфиков
    fig.tight_layout()

    # Отображение графиков
    plt.show()

In [None]:
t = df.drop(columns=['three_g', 'wifi', 'touch_screen', 'price_range', 'blue'])
draw_box_plot(t.iloc[:, 0:8])

In [None]:
draw_box_plot(t.iloc[:, 8:])

In [None]:
for coulumn in it.filterfalse(lambda x: x in 
                            set(['three_g', 'wifi', 'touch_screen', 'price_range', 'blue', 'id']),
                            it.islice(df.columns, 0, None)):
    mean = df[coulumn].mean()
    std = df[coulumn].std()

    lower_limit = mean - 3 * std
    upper_limit = mean + 3 * std

    ind = (df[coulumn] < lower_limit) | (df[coulumn] > upper_limit)
    out = df[ind][[coulumn, 'price_range']]
    if len(out) > 0:
        print(f'Нижняя граница {lower_limit}')
        print(f'Верхняя граница {upper_limit}')
        print(f'Количество {len(out)}')
        print(out)

In [None]:
phik_matrix(train)

In [None]:
train.corr()

In [None]:
def print_score(pred_y, test_y):
    print(f'Accuracy {round(accuracy_score(test_y, pred_y), 4)}')
    print(f"F1 {round(f1_score(test_y, pred_y, average='weighted'), 4)}")
    print(f"Precision {round(precision_score(test_y, pred_y, average='weighted'), 4)}")
    print(f"Recall {round(recall_score(test_y, pred_y, average='weighted'), 4)}")
    

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train.drop(columns='price_range'),
                                                    train['price_range'],
                                                    test_size=0.2, random_state=42)

In [None]:
X_train.head()

In [None]:
from sklearn import preprocessing

scaler = preprocessing.MinMaxScaler()

names = X_train.columns
d = scaler.fit_transform(X_train)

X_train = pd.DataFrame(d, columns=names)
print(len(X_train))
X_train.head()

In [None]:
names = X_test.columns
d = scaler.fit_transform(X_test)

X_test = pd.DataFrame(d, columns=names)
print(len(X_test))
X_test.head()

In [None]:
model = KNeighborsClassifier(n_neighbors=200, weights='distance')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print_score(y_pred, y_test)

In [None]:
model = KNN(countNeigbor=200, method='kdtree')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print_score(y_pred, y_test)

In [None]:
model = KNN_weight(countNeigbor=200, method='kdtree')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print_score(y_pred, y_test)

In [None]:
model = ParzenWindowFixedWidth(width=2.1, method='kdtree')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print_score(y_pred, y_test)

In [None]:
model = ParzenWindowVariableWidth(countNeighbor=200, method='kdtree')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print_score(y_pred, y_test)

In [None]:
model = PotentialFunction(width=2.1, eps=0.6, method='kdtree')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print_score(y_pred, y_test)