## IMPORTING LIBRARIES

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

## READING THE DATASET

In [None]:
df=pd.read_csv("final_merged_file.csv")

In [None]:
df

In [None]:
sns.boxplot(df)

## FINDING THE CORRELATION WITH THE COLUMNS

In [None]:
df.corr()

In [None]:
df['el_power_predicted']=0.0

In [None]:
df.columns

## TRAINING MODEL CREATION

### SIMPLE MODEL WITHOUT VALIDATION SET

In [None]:
X=df[['el_power']]
y=df['input_voltage']
model=LinearRegression()
model.fit(X,y)
df['el_power_predicted']=model.predict(X)
print("Intercept is : ", model.intercept_)
print("Coefficient is : ", model.coef_)

In [None]:
df

### VISUALIZING IT AND CALCULATING RMSE AND R2 SCORE

In [None]:
plt.figure(figsize=(10,6))
sns.regplot(x='el_power', y='input_voltage', data=df,color='green')
plt.title('Linear Regression: Input Voltage vs. Electrical Power')
plt.xlabel('Electrical Power')
plt.ylabel('Input Voltage')
plt.show()

In [None]:
y_pred = model.predict(X)
rmse = np.sqrt(mean_squared_error(y, y_pred))
print(f"Root Mean Squared Error (RMSE): {rmse}")
r2 = r2_score(y, y_pred)
print(f"R-squared (R2): {r2}")

## TESTING THE ABOVE MODEL

In [None]:
df1=pd.read_csv("combined_test_dataset.csv")

### VISUALIZING AND CALCULATING RMSE AND R2 SCORE


In [None]:
X_test = df1[['el_power']]
y_test = df1['input_voltage']
y_pred_test = model.predict(X_test)
plt.figure(figsize=(10, 6))
sns.scatterplot(x=X_test['el_power'], y=y_test, label='Actual')
sns.scatterplot(x=X_test['el_power'], y=y_pred_test, label='Predicted', marker='x')
plt.plot(X_test['el_power'], y_pred_test, color='red', linewidth=1)
plt.title('Linear Regression: Input Voltage vs. Electrical Power (Test Data)')
plt.xlabel('Electrical Power')
plt.ylabel('Input Voltage')
plt.legend()
plt.show()
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
print(f"Root Mean Squared Error (RMSE) for Test Data: {rmse_test}")

r2_test = r2_score(y_test, y_pred_test)
print(f"R-squared (R2) for Test Data: {r2_test}")

## RESULT

Root Mean Squared Error (RMSE) for Test Data: 1.3409505044240877
R-squared (R2) for Test Data: 0.7770276643417563
Root Mean Squared Error (RMSE): 1.279181454280843
R-squared (R2): 0.7832737656130924

## TRAINING THE MODEL WITH TRAIN AND VALIDATION SET

In [None]:
X = df[['el_power']]
y = df['input_voltage']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred_val = model.predict(X_val)
rmse_val = np.sqrt(mean_squared_error(y_val, y_pred_val))
r2_val = r2_score(y_val, y_pred_val)
print(f"Validation RMSE: {rmse_val}")
print(f"Validation R-squared: {r2_val}")
plt.figure(figsize=(10, 6))
sns.scatterplot(x=X_val['el_power'], y=y_val, label='Actual')
sns.scatterplot(x=X_val['el_power'], y=y_pred_val, label='Predicted', marker='x')
plt.plot(X_val['el_power'], y_pred_val, color='red', linewidth=1)
plt.title('Linear Regression: Input Voltage vs. Electrical Power (Validation Data)')
plt.xlabel('Electrical Power')
plt.ylabel('Input Voltage')
plt.legend()
plt.show()

## TESTING THE MODEL

In [None]:
X_test = df1[['el_power']]
y_test = df1['input_voltage']
y_pred_test = model.predict(X_test)
plt.figure(figsize=(10, 6))
sns.scatterplot(x=X_test['el_power'], y=y_test, label='Actual')
sns.scatterplot(x=X_test['el_power'], y=y_pred_test, label='Predicted', marker='x')
plt.plot(X_test['el_power'], y_pred_test, color='red', linewidth=1)
plt.title('Linear Regression: Input Voltage vs. Electrical Power (Test Data)')
plt.xlabel('Electrical Power')
plt.ylabel('Input Voltage')
plt.legend()
plt.show()
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
print(f"Root Mean Squared Error (RMSE) for Test Data: {rmse_test}")
r2_test = r2_score(y_test, y_pred_test)
print(f"R-squared (R2) for Test Data: {r2_test}")

## RESULTS

Root Mean Squared Error (RMSE) for Test Data: 1.3410588636671728

R-squared (R2) for Test Data: 0.776991627083641

Validation RMSE: 1.267224998824096

Validation R-squared: 0.7871424058520148

## CONCLUSION

THE ABOVE MODEL SCORES WITH AND WITHOUT VALIDATION TEST SET PERFORMS POORLY WITH COMPARISION TO OTHER MODELS HENCE THIS SHOULD BE NEGLECTED AND SHOULD NOT BE COUNTED