# Необходимые импорты

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense

from sklearn.metrics import classification_report, mean_squared_error

# Datasets

In [None]:
frac_df =  pd.read_csv(
    './frac.csv', 
    sep=";"
)
result_slice_df = pd.read_csv(
    './result_slice.csv',
    encoding = "windows-1251", 
    sep=";"
)

In [None]:
display(frac_df)
display(frac_df.shape)
display(frac_df.info())
display(frac_df.describe())

# Регрессия

## Подготовка датасета

In [None]:
def toNum(s):
  try:
    return float(s.replace(',','.'))
  except:
    return s

In [None]:
for col in frac_df.columns:
  frac_df[col]= frac_df[col].apply(toNum)

In [None]:
col_num = 0
k_miss = .5

half_cols = []

for half_miss in ((frac_df.isnull().sum() / frac_df.shape[0]) > k_miss):
    if half_miss:
        half_cols.append(frac_df.columns.tolist()[col_num])

    col_num += 1

half_cols

In [None]:
frac_df = frac_df.drop(
    columns = half_cols
)

In [None]:
col_num = 0
obj_cols = []

for is_obj in frac_df.dtypes == np.object_:
    if is_obj:
        obj_cols.append(frac_df.columns.tolist()[col_num])

    col_num += 1

obj_cols

In [None]:
frac_df = frac_df.drop(
    columns = obj_cols
)

In [None]:
frac_df = frac_df.fillna(method = 'ffill', axis = 0)
frac_df = frac_df.dropna(axis = 0)    
frac_df

## Опыты

In [None]:
X = frac_df.drop(['result_qoil3', 'year'], axis=1)
y = frac_df['result_qoil3']

X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y, 
    test_size = 0.2, 
    random_state = 15
)
X_train, X_val, y_train, y_val = train_test_split(
    X_train,
    y_train,
    test_size = 0.2,
    random_state = 15,
)

print(
    f'X_train: {X_train.shape}',
    f'X_val: {X_val.shape}',
    sep='\n'
)

In [None]:
mse_results = []

### Опыт №1

In [None]:
# Эксперемент, параметры можно менять 
regres_model = Sequential()

regres_model.add(Dense(120, activation='relu', input_dim = X_train.shape[1]))
regres_model.add(Dense(60, activation='relu', input_dim = X_train.shape[1]))
regres_model.add(Dense(1))

regres_model.compile(optimizer='adam', loss='mse', metrics='mae')

# Эксперемент, параметры можно менять
batch_size = 10
epochs = 105

regres_model_hist = regres_model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val)
)

In [None]:
y_pred = regres_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mse_results.append(mse)

print("MSE: %f" % (mse))

In [None]:
result_df = pd.DataFrame() 
result_df['Test'] = y_test
result_df['Pred'] = y_pred

result_df

In [None]:
loss_function = regres_model_hist.history['loss']
val_loss_function = regres_model_hist.history['val_loss']

epoch = range(1,epochs+1)

plt.figure(figsize=(20,5))
plt.title('Потери на обучающей и валидационной выборках')
plt.plot(epoch,loss_function,color='blue',label='Ошибка обучения (MSE)')
plt.plot(epoch,val_loss_function,color='red',label='Ошибка валидации (MSE)')
plt.xlabel('Эпоха')
plt.ylabel('Ошибка модели (MSE)')
plt.legend()
plt.show()

### Опыт №2

In [None]:
# Эксперемент, параметры можно менять
regres_model = Sequential()

regres_model.add(Dense(120, activation='relu', input_dim = X_train.shape[1]))
regres_model.add(Dense(60, activation='relu', input_dim = X_train.shape[1]))
regres_model.add(Dense(1))

regres_model.compile(optimizer='adam', loss='mse', metrics='mae')

# Эксперемент, параметры можно менять
batch_size = 10
epochs = 200
regres_model_hist = regres_model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val)
)

In [None]:
y_pred = regres_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mse_results.append(mse)

print("MSE: %f" % (mse))

In [None]:
result_df = pd.DataFrame() 
result_df['Test'] = y_test
result_df['Pred'] = y_pred

result_df

In [None]:
loss_function = regres_model_hist.history['loss']
val_loss_function = regres_model_hist.history['val_loss']

epoch = range(1,epochs+1)

plt.figure(figsize=(20,5))
plt.title('Потери на обучающей и валидационной выборках')
plt.plot(epoch,loss_function,color='blue',label='Ошибка обучения (MSE)')
plt.plot(epoch,val_loss_function,color='red',label='Ошибка валидации (MSE)')
plt.xlabel('Эпоха')
plt.ylabel('Ошибка модели (MSE)')
plt.legend()
plt.show()

### Опыт №3

In [None]:
# Эксперемент, параметры можно менять
regres_model = Sequential()

regres_model.add(Dense(60, activation='relu', input_dim = X_train.shape[1]))
regres_model.add(Dense(30, activation='relu', input_dim = X_train.shape[1]))
regres_model.add(Dense(1))

regres_model.compile(optimizer='adam', loss='mse', metrics='mae')

# Эксперемент, параметры можно менять
batch_size = 10
epochs = 200

regres_model_hist = regres_model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val)
)

In [None]:
y_pred = regres_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mse_results.append(mse)

print("MSE: %f" % (mse))

In [None]:
result_df = pd.DataFrame() 
result_df['Test'] = y_test
result_df['Pred'] = y_pred

result_df

In [None]:
loss_function = regres_model_hist.history['loss']
val_loss_function = regres_model_hist.history['val_loss']

epoch = range(1,epochs+1)

plt.figure(figsize=(20,5))
plt.title('Потери на обучающей и валидационной выборках')
plt.plot(epoch,loss_function,color='blue',label='Ошибка обучения (MSE)')
plt.plot(epoch,val_loss_function,color='red',label='Ошибка валидации (MSE)')
plt.xlabel('Эпоха')
plt.ylabel('Ошибка модели (MSE)')
plt.legend()
plt.show()

### Опыт №4

In [None]:
# Эксперемент, параметры можно менять
regres_model = Sequential()

regres_model.add(Dense(60, activation='relu', input_dim = X_train.shape[1]))
regres_model.add(Dense(60, activation='relu', input_dim = X_train.shape[1]))
regres_model.add(Dense(1))

regres_model.compile(optimizer='adam', loss='mse', metrics='mae')

# Эксперемент, параметры можно менять
batch_size = 5
epochs = 200

regres_model_hist = regres_model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val)
)

In [None]:
y_pred = regres_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mse_results.append(mse)

print("MSE: %f" % (mse))

In [None]:
result_df = pd.DataFrame() 
result_df['Test'] = y_test
result_df['Pred'] = y_pred

result_df

In [None]:
loss_function = regres_model_hist.history['loss']
val_loss_function = regres_model_hist.history['val_loss']

epoch = range(1,epochs+1)

plt.figure(figsize=(20,5))
plt.title('Потери на обучающей и валидационной выборках')
plt.plot(epoch,loss_function,color='blue',label='Ошибка обучения (MSE)')
plt.plot(epoch,val_loss_function,color='red',label='Ошибка валидации (MSE)')
plt.xlabel('Эпоха')
plt.ylabel('Ошибка модели (MSE)')
plt.legend()
plt.show()

### Опыт №5

In [None]:
# Эксперемент, параметры можно менять
regres_model = Sequential()

regres_model.add(Dense(120, activation='relu', input_dim = X_train.shape[1]))
regres_model.add(Dense(60, activation='relu'))
regres_model.add(Dense(30, activation='relu'))
regres_model.add(Dense(1))
regres_model.compile(optimizer='adam', loss='mse', metrics='mae')

# Эксперемент, параметры можно менять
batch_size = 10
epochs = 50

regres_model_hist = regres_model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_val, y_val)
)

In [None]:
y_pred = regres_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mse_results.append(mse)

print("MSE: %f" % (mse))

In [None]:
result_df = pd.DataFrame() 
result_df['Test'] = y_test
result_df['Pred'] = y_pred

result_df

In [None]:
loss_function = regres_model_hist.history['loss']
val_loss_function = regres_model_hist.history['val_loss']

epoch = range(1,epochs+1)

plt.figure(figsize=(20,5))
plt.title('Потери на обучающей и валидационной выборках')
plt.plot(epoch,loss_function,color='blue',label='Ошибка обучения (MSE)')
plt.plot(epoch,val_loss_function,color='red',label='Ошибка валидации (MSE)')
plt.xlabel('Эпоха')
plt.ylabel('Ошибка модели (MSE)')
plt.legend()
plt.show()

### Результаты опытов

In [None]:
num = 1

for mse in mse_results:
    print(f'Опыт №{num}')
    print('MSE:', mse)
    print('RMSE:', np.sqrt(mse))
    print()

    num += 1

# Классификация

In [None]:
df_cl = pd.read_csv('./result_slice.csv')
  
df_cl = df_cl.dropna(axis='columns', how='all', inplace=False)
df_cl = df_cl.drop('Unnamed: 0', axis=1)

# df_cl.info()

df_cl_nan = pd.DataFrame(round((df_cl.isnull().mean() * 100), 2))

df_cl = df_cl.drop(list(df_cl_nan[df_cl_nan[0] > 40].index), axis=1)
df_cl

In [None]:
df_cl = df_cl.dropna()

#Перемещаем столбец DEPT в конец таблицы
df_cl = df_cl.reindex(columns=list(df_cl.drop('EF_b', axis=1).append(pd.DataFrame(df_cl['EF_b']))))

In [None]:
# df_cl[]
df_cl['EF_b_BIN'] = (df_cl['EF_b'] > 2).astype('float64')
df_cl

In [None]:
X = df_cl.drop(['DEPT', 'well', 'FZ', 'EF_b', 'EF_b_BIN'], axis=1)
y = df_cl['EF_b_BIN']

In [None]:
from sklearn.preprocessing import StandardScaler
stdScaler = StandardScaler()
X = stdScaler.fit_transform(X)
X

In [None]:
from sklearn.model_selection import train_test_split
X_all_train, X_test, y_all_train, y_test = train_test_split(X, y, test_size=0.15, random_state=15)

X_train, X_val, y_train, y_val = train_test_split(X_all_train, y_all_train, test_size=0.2,random_state=15)

In [None]:
#'logistic', 'relu', 'tanh'
from sklearn.neural_network import MLPClassifier

batch_size = 200
max_iter = 50

model = MLPClassifier(
    hidden_layer_sizes=(1,2),
    activation='tanh',
    early_stopping=True,
    max_iter= max_iter,
    batch_size  = batch_size
)

hist_cl = model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

from sklearn.metrics import classification_report
print(classification_report(y_true=y_test, y_pred=y_pred))