In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import klib as kl
import missingno as mns
import os
import warnings
import tqdm
import numba

os.environ['KERAS_BACKEND'] = 'tensorflow'
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
warnings.filterwarnings('ignore')
plt.rcParams['font.sans-serif'] = ['Kaiti']
plt.rcParams['axes.unicode_minus'] = False
PIC_PATH = "../../models/image/image2/internet"
DATA_PATH = '../../data'
RESULT_PATH = '../../data/summary/'
MODEL_PATH = '../../models/model1'
import pathlib2 as pl2
import shutil


def creat_dir():
    pic_path = pl2.Path(PIC_PATH)
    if os.path.exists(PIC_PATH):
        shutil.rmtree(PIC_PATH)
    pic_path.mkdir(parents=True, exist_ok=True)
    if not os.path.exists(RESULT_PATH):
        os.mkdir(RESULT_PATH)
    if not os.path.exists(MODEL_PATH):
        os.mkdir(MODEL_PATH)


creat_dir()
figure_count = 0


def create_figure(figure_name, dpi=800):
    global figure_count
    figure_count += 1
    plt.savefig(PIC_PATH + f'/figure{figure_count}_{figure_name}.png', dpi=dpi)


from joblib import dump, load


def save_model(model, model_name: str) -> None:
    dump(model, MODEL_PATH + model_name)


def load_model(model_name: str):
    return load(MODEL_PATH + model_name)

In [None]:
data = pd.read_csv(RESULT_PATH + 'internet.csv', index_col=0)
data

In [None]:
from sklearn.ensemble import RandomForestClassifier

model: RandomForestClassifier = load_model('/best_model_internet.model')

In [None]:
impact = pd.DataFrame(index=data.columns[4:], data=model.feature_importances_, columns=['impact'])
impact = impact.sort_values(by='impact', ascending=False)
impact

In [None]:
plt.figure(figsize=(20, 9))
plt.bar(impact.index[:10], impact['impact'][:10])
create_figure('importance')

In [None]:
import toad

train = data.iloc[:, 4:]
target = data.iloc[:, :4]
toad.quality(pd.concat([train, target.iloc[:, 0]], axis=1), target=target.columns[0]).to_excel(RESULT_PATH + 'internet_' +
                                                                                               target.columns[0] + '.xlsx',
                                                                                               index=True)

In [None]:
toad.quality(pd.concat([train, target.iloc[:, 1]], axis=1), target=target.columns[1]).to_excel(RESULT_PATH + 'internet_' +
                                                                                               target.columns[1] + '.xlsx',
                                                                                               index=True)

In [None]:
toad.quality(pd.concat([train, target.iloc[:, 2]], axis=1), target=target.columns[2]).to_excel(RESULT_PATH + 'internet_' +
                                                                                               target.columns[2] + '.xlsx',
                                                                                               index=True)

In [None]:
toad.quality(pd.concat([train, target.iloc[:, 3]], axis=1), target=target.columns[3]).to_excel(RESULT_PATH + 'internet_' +
                                                                                               target.columns[3] + '.xlsx',
                                                                                               index=True)

In [None]:
from shap import TreeExplainer
import shap

explain = TreeExplainer(model)
shap_value = explain.shap_values(train)
shap.summary_plot(shap_value[0], train, show=False)
create_figure('internet_shap_' + target.columns[0])

In [None]:
shap.summary_plot(shap_value[1], train, show=False)
create_figure('internet_shap_' + target.columns[1])

In [None]:
shap.summary_plot(shap_value[3], train, show=False)
create_figure('internet_shap_' + target.columns[3])

In [None]:
shap.summary_plot(shap_value[2], train, show=False)
create_figure('internet_shap_' + target.columns[2])

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix

plt.figure(figsize=(16, 16))
plt.subplot(221)
matrix = confusion_matrix(target.手机上网整体满意度, model.predict(train)[:, 0])
matrix = pd.DataFrame(matrix, index=[i for i in range(1, 11)], columns=[i for i in range(1, 11)])
sns.heatmap(StandardScaler().fit_transform(matrix))
plt.title('手机上网整体满意度')
plt.xlim((1, 10))
plt.ylim((1, 10))
plt.subplot(222)
sns.heatmap(StandardScaler().fit_transform(matrix))
matrix = confusion_matrix(target.手机上网稳定性, model.predict(train)[:, 0])
matrix = pd.DataFrame(matrix, index=[i for i in range(1, 11)], columns=[i for i in range(1, 11)])
plt.title('手机上网稳定性')
plt.xlim((1, 10))
plt.ylim((1, 10))
plt.subplot(223)
sns.heatmap(StandardScaler().fit_transform(matrix))
matrix = confusion_matrix(target.手机上网速度, model.predict(train)[:, 0])
matrix = pd.DataFrame(matrix, index=[i for i in range(1, 11)], columns=[i for i in range(1, 11)])
plt.title('手机上网速度')
plt.xlim((1, 10))
plt.ylim((1, 10))
plt.subplot(224)
sns.heatmap(StandardScaler().fit_transform(matrix))
matrix = confusion_matrix(target.网络覆盖与信号强度, model.predict(train)[:, 0])
matrix = pd.DataFrame(matrix, index=[i for i in range(1, 11)], columns=[i for i in range(1, 11)])
plt.title('网络覆盖与信号强度')
plt.xlim((1, 10))
plt.ylim((1, 10))
create_figure('confusion_matrix')

In [None]:
from autoviz.AutoViz_Class import AutoViz_Class

data = data.sort_values(by='手机上网整体满意度', ascending=True)
av = AutoViz_Class()
av.AutoViz(filename=None, depVar="手机上网整体满意度", dfte=data, verbose=1, chart_format='png', save_plot_dir=PIC_PATH + '/')