In [None]:
pip install openpyxl pandas numpy sklearn matplotlib seaborn

In [None]:
import pandas as pd
import numpy as np
import sklearn as sk
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.plotting import scatter_matrix
from sklearn.neighbors import LocalOutlierFactor
from sklearn import preprocessing

In [None]:
x_bp = pd.read_excel(io = "./hw_data_composite/X_bp.xlsx")
x_bp = x_bp.astype({'Unnamed: 0':'int'})
x_bp

In [None]:
x_nup = pd.read_excel(io = "./hw_data_composite/X_nup.xlsx")
x_nup = x_nup.astype({'Unnamed: 0':'int','Угол нашивки, град':'int'})
x_nup

In [None]:
dataset = pd.merge(x_bp, x_nup, how='inner', on='Unnamed: 0')
dataset = dataset.drop(labels = 'Unnamed: 0', axis = 1)
dataset

In [None]:
dataset.median()

In [None]:
dataset.mean()

In [None]:
ds_without_angles = dataset.drop(axis = 1, labels = 'Угол нашивки, град')

In [None]:
ds_without_angles.hist(figsize = (20, 20), bins = 20, color = 'green')

In [None]:
for col in ds_without_angles.columns:
    sns.boxplot(x = dataset[col], color = 'green')
    plt.show()

In [None]:
pair_diagram = scatter_matrix(ds_without_angles, figsize = (16, 16), alpha = 0.25, color = 'green', diagonal = None)
for ax in pair_diagram.ravel():
    ax.set_xlabel(ax.get_xlabel(), fontsize = 10, rotation = 45)
    ax.set_ylabel(ax.get_ylabel(), fontsize = 10, rotation = 45)
pair_diagram

In [None]:
dataset.isnull().sum()

In [None]:
lof = LocalOutlierFactor(n_neighbors = 300)
y_pred = lof.fit_predict(ds_without_angles)
outlier_indexes = []
for i in range(len(y_pred)):
    if y_pred[i] == -1:
        outlier_indexes.append(i)
print('Кол-во выбросов: ' + str(len(outlier_indexes)))

In [None]:
dataset.drop(outlier_indexes, inplace = True, errors='ignore')
ds_without_angles.drop(outlier_indexes, inplace = True, errors='ignore')
dataset

In [None]:
min_max_scaler = preprocessing.MinMaxScaler()
dataset_normalized = pd.DataFrame(min_max_scaler.fit_transform(dataset.values))
dataset_normalized.columns = dataset.columns
dataset_normalized

In [None]:
standart_scaler = preprocessing.StandardScaler()
dataset_standartized = pd.DataFrame(standart_scaler.fit_transform(dataset.values))
dataset_standartized.columns = dataset.columns
dataset_standartized

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import KFold

In [None]:
input_data = dataset_normalized.drop(axis = 1, labels = ['Соотношение матрица-наполнитель','Модуль упругости при растяжении, ГПа','Прочность при растяжении, МПа'])
input_data

In [None]:
output_data = dataset_normalized[['Модуль упругости при растяжении, ГПа','Прочность при растяжении, МПа']]
output_data

In [None]:
ridge_results = []
for alpha in np.logspace(-10, -0, 11):
    ridge_result = cross_val_score(
        Ridge(alpha), input_data, output_data, cv=KFold(n_splits = 10, shuffle = True), n_jobs=-1)
    ridge_results.append([alpha, ridge_result.mean()])
ridge_df = pd.DataFrame(ridge_results, columns = ['alpha','score']).sort_values(by = 'score', ascending = False)
ridge_df

In [None]:
lasso_results = []
for alpha in np.logspace(-10, -0, 11):
    lasso_result = cross_val_score(
        Lasso(alpha), input_data, output_data, cv=KFold(n_splits = 10, shuffle = True), n_jobs=-1)
    lasso_results.append([alpha, lasso_result.mean()])
lasso_df = pd.DataFrame(lasso_results, columns = ['alpha','score']).sort_values(by = 'score', ascending = False)
lasso_df

In [None]:
poly_elastic_results = []
for alpha in np.logspace(-10, -0, 11):
    for ratio in np.arange(11)/10:    
        poly_elastic_result = cross_val_score(
            ElasticNet(alpha, l1_ratio = ratio),
            PolynomialFeatures(2).fit_transform(input_data),
            output_data,
            cv=KFold(n_splits = 10, shuffle = True), n_jobs=-1)
        poly_elastic_results.append([alpha, ratio, poly_elastic_result.mean()])
pedf = pd.DataFrame(poly_elastic_results, columns = ['alpha','ratio','score']).sort_values(by = 'score', ascending = False)
pedf

In [None]:
input_train, input_test, out_train, out_test = train_test_split(
    input_data, output_data, shuffle = True, test_size = 0.3)

In [None]:
linr = LinearRegression(n_jobs = -1)
linr.fit(input_train, out_train)
print(lr.score(input_test, out_test))

In [None]:
ridr = Ridge(alpha = ridge_df['alpha'].iloc[0])
ridr.fit(input_train, out_train)
print(ridr.score(input_test, out_test))

In [None]:
lasr = Lasso(alpha = lasso_df['alpha'].iloc[0])
lasr.fit(input_train, out_train)
print(lasr.score(input_test, out_test))

In [None]:
per = ElasticNet(alpha = pedf['alpha'].iloc[0], l1_ratio = pedf['ratio'].iloc[0])
per.fit(PolynomialFeatures(2).fit_transform(input_train), out_train)
print(per.score(PolynomialFeatures(2).fit_transform(input_test), out_test))

dataset_0 = []
dataset_90 = []
for i in range(len(dataset)):
    if dataset['Угол нашивки, град'][i] == 0:
        dataset_90.append(i)
    else:
        dataset_0.append(i)
dataset_90 = dataset.drop(dataset_90, axis = 0)
dataset_0 = dataset.drop(dataset_0, axis = 0)

pair_diagram = scatter_matrix(dataset_0, figsize = (16, 16), alpha = 0.25, color = 'green', diagonal = None)
for ax in pair_diagram.ravel():
    ax.set_xlabel(ax.get_xlabel(), fontsize = 10, rotation = 45)
    ax.set_ylabel(ax.get_ylabel(), fontsize = 10, rotation = 45)
pair_diagram

pair_diagram = scatter_matrix(dataset_90, figsize = (16, 16), alpha = 0.25, color = 'green', diagonal = None)
for ax in pair_diagram.ravel():
    ax.set_xlabel(ax.get_xlabel(), fontsize = 10, rotation = 45)
    ax.set_ylabel(ax.get_ylabel(), fontsize = 10, rotation = 45)
pair_diagram

dataset_90.hist(figsize = (20, 20), bins = 20, color = 'green')

dataset_0.hist(figsize = (20, 20), bins = 20, color = 'green')