# Solar Linear Regression Model
### This file used scikit-learn linear regression model to fit to the solar data given



#### Installing Dependencies and importing libraries

In [1]:
# Installing Dependencies
%pip install -U scikit-learn
%pip install -U pandas





[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.3.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# Importing matplotlib, numpy and pandas
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
# importing scikit learn models and error models
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

#### Importing solar csv datasets

In [4]:
# Importing csv datasets and converting to pandas dataframes
generation_data_1 = pd.read_csv("./solar_dataset/Plant_1_Generation_Data.csv")
sensor_data_1 = pd.read_csv("./solar_dataset/Plant_1_Weather_Sensor_Data.csv")

FileNotFoundError: [Errno 2] No such file or directory: './solar_dataset/Plant_1_Generation_Data.csv'

#### Fixing mismatch DATETIME

In [None]:
# Converting the date time format to match
generation_data_1['DATE_TIME'] = pd.to_datetime(generation_data_1['DATE_TIME'],format = '%d-%m-%Y %H:%M')
generation_data_1['DATE_TIME'] = generation_data_1['DATE_TIME'].dt.strftime('%Y-%m-%d %H:%M:%S')

#### Merging data frames on DATE_TIME attribute

In [None]:
# Merging two pandas data frames
merged_data = generation_data_1.merge(sensor_data_1, on = 'DATE_TIME')

merged_data

#### Taking out 0 irradiance values

In [None]:
# Filtering data that have irradiation
condition = (merged_data['IRRADIATION'] > 0.0)
filtered_data = merged_data[condition]

In [None]:
# Setting specific viewing columns
selected_columns = ['DATE_TIME','DC_POWER','AC_POWER','DAILY_YIELD','TOTAL_YIELD','AMBIENT_TEMPERATURE','MODULE_TEMPERATURE','IRRADIATION']

filtered_data[selected_columns][100:150]

## Scikit Learn Model setup

#### Setting up arrays for scikit-learn

In [None]:
# X contains features, y contains the target variable
X = filtered_data['IRRADIATION'].values.reshape(-1,1)

y = filtered_data['AC_POWER'].values

### Splitting data to train and test

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [None]:
X_train
y_train

#### Training the model with the data

In [None]:
# Instantiate and terain a scikit-learn model (Linear Regresssion in the example)
model = LinearRegression()
model.fit(X_train, y_train)

#### Predicting the power from trained model

In [None]:
# Make the predictions on the test set
y_pred = model.predict(X_test)

#### Evaluate the correctness of the model

In [None]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

#### Plotting regresssion using matplot lib

In [None]:
#Plotting regression using matplot lib
plt.scatter(X_test, y_test,s=5, color= "black")
plt.plot(X_test, y_pred, color="blue", linewidth=3)