In [27]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import psycopg2
from config import db_password
# connection function
def get_db_connection():
    conn = psycopg2.connect(database="N-Butane", user="postgres", password=db_password, host="127.0.0.1", port="5432")
    # will need to create a database, and make a config.py to hold the password 
    return conn

conn = get_db_connection()
cursor = conn.cursor()

# establish connect with postgres database to extract data
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute("select * from project_data")
result = cursor.fetchall()

df = pd.DataFrame(result)

from feature_names import column_names

df.columns = column_names
df.head()



Unnamed: 0,Date,N_Butane,TB_Recycle_Charge,NorthTC_Charge,North_Flush_Pressure,North_Out_Temp,SouthTA_Recycle_Charge,SouthTC_Charge,South_Flush_Pressure,South_Out_Temp,...,TC_Tray7_Temp,TC_Tray40_Temp,TC_Charge_Temp,TC_Overhead_Pressure,TC_Overhead_Reciever_Temp,TC_Tray24_Temp,TC_Differential_Pressure,TC_Bottom_Pressure,TC_Reboil_Out_Temp,TC_Reboil_Condensor_lbs
0,2022-07-15 13:00:00,11.2,2129.65,2675.76,154.651,98.2989,6399.56,2656.33,143.764,101.069,...,189.015,200.231,185.963,225.736,171.159,171.122,3.4748,229.263,201.867,7962.54
1,2022-07-15 06:00:00,13.9,2272.0,2525.6,154.997,94.0955,6395.5,2514.06,145.534,97.1518,...,188.525,200.026,183.01,225.408,170.034,166.027,3.4974,228.926,201.667,8235.4
2,2022-07-14 13:00:00,13.6,2201.02,2205.66,154.588,97.2448,5933.97,2414.58,145.156,101.6,...,189.107,200.47,184.995,225.51,170.889,172.315,3.25573,228.813,202.1,7684.43
3,2022-07-14 06:00:00,11.0,2298.88,2419.37,153.091,94.8862,5902.11,2375.45,143.621,97.6705,...,189.208,200.854,183.829,227.578,168.274,167.6,3.34195,230.949,202.448,7940.17
4,2022-07-13 13:00:00,10.7,2269.17,2569.28,149.635,99.7484,5749.2,2567.08,141.749,102.513,...,188.841,200.382,185.034,226.514,168.065,172.359,3.21866,229.778,202.281,7591.85


In [40]:
# Separate the Features (X) from the Target (y)
y = df["N_Butane"]
X = df.drop(["N_Butane", "Date"], axis = 1)

# Split into train data and test data
from cmath import sqrt
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=0)

# Import Linear Regression Model

model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make Predictions
y_pred = model.predict(X_test)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)


# Calculated Metrics
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import explained_variance_score as evs

# r2 score
regression_score = r2_score(y_test, y_pred)

# Mean Squared Error (MSE)
mean_sq_error = mse(y_test, y_pred)

mean_error = sqrt(mean_sq_error).real

# Mean Absolute Error (MAE)
mean_abs_error = mae(y_test, y_pred)

# explained variance score
ev_score = evs(y_test, y_pred)


print(regression_score)
print(mean_sq_error)
print(mean_error)
print(mean_abs_error)
print(ev_score)

0.43522916507833964
6.7309024288643
2.594398278766061
1.7541622371769077
0.4377638431515226


In [41]:
metrics = pd.DataFrame({
    "R2_Score": [regression_score],
    "Mean_Error": [mean_error],
    "Mean_Absolute_Error": [mean_abs_error],
    "Estimated_Variance_Score": [ev_score]
})

metrics

Unnamed: 0,R2_Score,Mean_Error,Mean_Absolute_Error,Estimated_Variance_Score
0,0.435229,2.594398,1.754162,0.437764


In [42]:
metrics.dtypes

R2_Score                    float64
Mean_Error                  float64
Mean_Absolute_Error         float64
Estimated_Variance_Score    float64
dtype: object

In [30]:
results.head()

Unnamed: 0,Prediction,Actual
0,15.760575,17.0
1,12.537719,15.1
2,10.093043,10.4
3,13.446155,14.2
4,15.666933,14.5


In [36]:
results.dtypes

Prediction    float64
Actual        float64
dtype: object

In [31]:
# create new metric table in N-Butane database
try:
    cursor.execute("CREATE TABLE proj_data_metrics (R2_Score real, Mean_Error real, Mean_Absolute_Error real, Estimated_Variance_Score real);")
except:
    print("I can't make the database!")

conn.commit() 

I can't make the database!


In [32]:
# create new results table in N-Butane database
try:
    cursor.execute("CREATE TABLE proj_data_results (Predictions real, Actual real);")
except:
    print("I can't make the database!")

conn.commit() 

I can't make the database!


In [43]:
# results and metrics to postgresql table
from sqlalchemy import create_engine


connect = f"postgresql+psycopg2://postgres:" + db_password + "@127.0.0.1:5432/N-Butane" 
engine = create_engine(connect)

results.to_sql('proj_data_results', con=engine, if_exists='replace', index=False)
metrics.to_sql('proj_data_metrics', con=engine, if_exists='replace', index=False)
conn.autocommit=True