In [1]:
import pandas as pd

# Read production data from Excel file
df= pd.read_excel("production data.xlsx")
df.head()



Unnamed: 0,Years,G-1.6 Meters,G-4 Meters,Total
0,1975,2953,0,2953
1,1976,29480,0,29480
2,1977,48930,0,48930
3,1978,62850,0,62850
4,1979,98750,0,98750


In [2]:
df.shape

(47, 4)

In [3]:
# Convert 'Years' column to datetime format
df['Years'] = pd.to_datetime(df['Years'], format='%Y')

# Generate a complete list of months from January 1975 to December 2021
all_months = pd.date_range(start='1975-01-01', end='2021-12-01', freq='MS')

# Create a DataFrame with all months
all_years = pd.DataFrame(all_months, columns=['Month'])

# Left merge the all_months DataFrame with the original dataset
monthly_df = pd.merge(all_years, df, how='left', left_on='Month', right_on='Years')

# Drop the 'Years' column
monthly_df.drop(columns='Years', inplace=True)

# Forward-fill NaN values with the corresponding production data
monthly_df.ffill(inplace=True)

# Calculate monthly production by dividing annual production by 12
monthly_df['Monthly Production (G-1.6)'] = monthly_df['G-1.6 Meters'] / 12
monthly_df['Monthly Production (G-4)'] = monthly_df['G-4 Meters'] / 12

# Sum up the monthly production for each meter type to get the monthly total production
monthly_df['Monthly Total Production'] = monthly_df['Monthly Production (G-1.6)'] + monthly_df['Monthly Production (G-4)']

# Save to Excel
output_file = 'expanded_monthly_production.xlsx'
monthly_df.to_excel(output_file, index=False)

print("Excel file saved successfully!")


Excel file saved successfully!


In [4]:
monthly_df.head()

Unnamed: 0,Month,G-1.6 Meters,G-4 Meters,Total,Monthly Production (G-1.6),Monthly Production (G-4),Monthly Total Production
0,1975-01-01,2953.0,0.0,2953.0,246.083333,0.0,246.083333
1,1975-02-01,2953.0,0.0,2953.0,246.083333,0.0,246.083333
2,1975-03-01,2953.0,0.0,2953.0,246.083333,0.0,246.083333
3,1975-04-01,2953.0,0.0,2953.0,246.083333,0.0,246.083333
4,1975-05-01,2953.0,0.0,2953.0,246.083333,0.0,246.083333


In [5]:
monthly_df.shape

(564, 7)

Now the data contains 564 rows and 7 columns

In [6]:
monthly_df.columns

Index(['Month', 'G-1.6 Meters', 'G-4 Meters', 'Total',
       'Monthly Production (G-1.6)', 'Monthly Production (G-4)',
       'Monthly Total Production'],
      dtype='object')

In [7]:
monthly_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 564 entries, 0 to 563
Data columns (total 7 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   Month                       564 non-null    datetime64[ns]
 1   G-1.6 Meters                564 non-null    float64       
 2   G-4 Meters                  564 non-null    float64       
 3   Total                       564 non-null    float64       
 4   Monthly Production (G-1.6)  564 non-null    float64       
 5   Monthly Production (G-4)    564 non-null    float64       
 6   Monthly Total Production    564 non-null    float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 31.0 KB


In [8]:
monthly_df.describe()

Unnamed: 0,Month,G-1.6 Meters,G-4 Meters,Total,Monthly Production (G-1.6),Monthly Production (G-4),Monthly Total Production
count,564,564.0,564.0,564.0,564.0,564.0,564.0
mean,1998-06-16 14:02:33.191489408,139942.340426,185764.340426,325706.680851,11661.861702,15480.361702,27142.223404
min,1975-01-01 00:00:00,2953.0,0.0,2953.0,246.083333,0.0,246.083333
25%,1986-09-23 12:00:00,98750.0,0.0,101577.0,8229.166667,0.0,8464.75
50%,1998-06-16 00:00:00,136000.0,176000.0,303750.0,11333.333333,14666.666667,25312.5
75%,2010-03-08 18:00:00,184260.0,321200.0,503840.0,15355.0,26766.666667,41986.666667
max,2021-12-01 00:00:00,284660.0,625000.0,851460.0,23721.666667,52083.333333,70955.0
std,,62853.674752,171551.688193,223279.394683,5237.806229,14295.974016,18606.616224


In [9]:
duplicate_rows_monthly_df = monthly_df[monthly_df.duplicated()]

In [10]:
print("\nDuplicate Rows:")
duplicate_rows_monthly_df


Duplicate Rows:


Unnamed: 0,Month,G-1.6 Meters,G-4 Meters,Total,Monthly Production (G-1.6),Monthly Production (G-4),Monthly Total Production


In [11]:
pip install matplotlib seaborn


Note: you may need to restart the kernel to use updated packages.


In [12]:
pip install --upgrade matplotlib seaborn


Collecting matplotlib
  Obtaining dependency information for matplotlib from https://files.pythonhosted.org/packages/3c/a5/54a497ca4af8e76adfe7c5a1712f3bb6b2222d464fe736b60aaafd425945/matplotlib-3.9.1-cp311-cp311-win_amd64.whl.metadata
  Downloading matplotlib-3.9.1-cp311-cp311-win_amd64.whl.metadata (11 kB)
Downloading matplotlib-3.9.1-cp311-cp311-win_amd64.whl (8.0 MB)
   ---------------------------------------- 0.0/8.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/8.0 MB 435.7 kB/s eta 0:00:19
    --------------------------------------- 0.1/8.0 MB 1.1 MB/s eta 0:00:08
    --------------------------------------- 0.2/8.0 MB 1.2 MB/s eta 0:00:07
   - -------------------------------------- 0.3/8.0 MB 1.5 MB/s eta 0:00:06
   - -------------------------------------- 0.4/8.0 MB 1.6 MB/s eta 0:00:05
   -- -------------------------------

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'c:\\users\\shassan\\appdata\\local\\anaconda3\\lib\\site-packages\\matplotlib\\backends\\_backend_agg.cp311-win_amd64.pyd'
Consider using the `--user` option or check the permissions.



In [13]:
import matplotlib.pyplot as plt


# Extracting data for G-1.6 Meters
months = monthly_df['Month']
g_1_6_meters = monthly_df['Monthly Production (G-1.6)']

# Creating the line plot for G-1.6 Meters
plt.figure(figsize=(10, 6))
plt.plot(months, g_1_6_meters, marker='o', color='blue', label='G-1.6 Meters')

# Adding labels and title
plt.xlabel('Month')
plt.ylabel('Production of G-1.6 Meters')
plt.title('Production of G-1.6 Meters Over Time')
plt.legend()

# Show plot
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid(True)
plt.show()


ImportError: DLL load failed while importing _c_internal_utils: The specified module could not be found.

In [None]:
import matplotlib.pyplot as plt


# Extracting data for G-1.6 Meters
months = monthly_df['Month']
g_4_meters = monthly_df['Monthly Production (G-4)']

# Creating the line plot for G-1.6 Meters
plt.figure(figsize=(10, 6))
plt.plot(months, g_4_meters, marker='o', color='green', label='G-4 Meters')

# Adding labels and title
plt.xlabel('Month')
plt.ylabel('Production of G-4 Meters')
plt.title('Production of G-4 Meters Over Time')
plt.legend()

# Show plot
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid(True)
plt.show()

In [None]:
import matplotlib.pyplot as plt


# Extracting data for G-1.6 Meters
months = monthly_df['Month']
total_production = monthly_df['Monthly Total Production']

# Creating the line plot for G-1.6 Meters
plt.figure(figsize=(10, 6))
plt.plot(months, total_production, marker='o', color='purple', label='Total Production')

# Adding labels and title
plt.xlabel('Month')
plt.ylabel('Total Production')
plt.title('Total Production of Meters Over Time')
plt.legend()

# Show plot
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid(True)
plt.show()

In [None]:
import matplotlib.pyplot as plt

# Assuming your DataFrame monthly_df looks like this:
# monthly_df = pd.DataFrame({
#     'Years': ['Year 1', 'Year 2', 'Year 3', 'Year 4', 'Year 5'],
#     'G-1.6 Meters': [10, 15, 20, 25, 30],
#     'G-4 Meters': [5, 10, 15, 20, 25]
# })

# Extracting total production of G-1.6 Meters and G-4 Meters
total_g_1_6 = monthly_df['Monthly Production (G-1.6)'].sum()
total_g_4 = monthly_df['Monthly Production (G-4)'].sum()

# Data for the pie chart
labels = ['G-1.6 Meters', 'G-4 Meters']
sizes = [total_g_1_6, total_g_4]
colors = ['blue', 'red']
explode = (0.1, 0)  # explode 1st slice

# Creating the pie chart
plt.figure(figsize=(8, 6))
plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%', shadow=True, startangle=140)
plt.title('Production Comparison: G-1.6 Meters vs G-4 Meters')

# Show plot
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()


# Linear Regression

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

# Assuming 'df' is your DataFrame containing production data
# Assuming 'df' has columns 'MONTH' and 'G-1.6 METERS' and 'G-4 METERS'
# If not, replace 'MONTH', 'G-1.6 METERS', and 'G-4 METERS' with your actual column names

# Data Preparation for G-1.6 METERS
X_1_6 = monthly_df[['Month']]  # Features (only using month)
y_1_6 = monthly_df['Monthly Production (G-1.6)']  # Target variable (production of G-1.6 meters)

# Data Preparation for G-4 METERS
X_4 = monthly_df[['Month']]  # Features (only using month)
y_4 = monthly_df['Monthly Production (G-4)']  # Target variable (production of G-4 meters)

# Normalization for G-1.6 METERS
scaler_1_6 = StandardScaler()
X_normalized_1_6 = scaler_1_6.fit_transform(X_1_6)

# Normalization for G-4 METERS
scaler_4 = StandardScaler()
X_normalized_4 = scaler_4.fit_transform(X_4)

# Splitting the data into training and testing sets for G-1.6 METERS
X_train_1_6, X_test_1_6, y_train_1_6, y_test_1_6 = train_test_split(X_normalized_1_6, y_1_6, test_size=0.2, random_state=42)

# Splitting the data into training and testing sets for G-4 METERS
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(X_normalized_4, y_4, test_size=0.2, random_state=42)

# Training the model for G-1.6 METERS
model_1_6 = LinearRegression()
model_1_6.fit(X_train_1_6, y_train_1_6)

# Training the model for G-4 METERS
model_4 = LinearRegression()
model_4.fit(X_train_4, y_train_4)

# Predictions for G-1.6 METERS
y_pred_1_6 = model_1_6.predict(X_test_1_6)

# Predictions for G-4 METERS
y_pred_4 = model_4.predict(X_test_4)

# Evaluating the model for G-1.6 METERS
mae_1_6 = mean_absolute_error(y_test_1_6, y_pred_1_6)
print("Mean Absolute Error for G-1.6 METERS:", mae_1_6)
# Interpretation and Insights for G-1.6 METERS
coefficients_1_6 = pd.DataFrame({'Feature': X_1_6.columns, 'Coefficient': model_1_6.coef_})
print(coefficients_1_6)

# Calculating R-squared for G-1.6 METERS
r2_1_6 = r2_score(y_test_1_6, y_pred_1_6)
print("R-squared for G-1.6 METERS:", r2_1_6)

# Evaluating the model for G-4 METERS
mae_4 = mean_absolute_error(y_test_4, y_pred_4)
print("Mean Absolute Error for G-4 METERS:", mae_4)
# Interpretation and Insights for G-4 METERS
coefficients_4 = pd.DataFrame({'Feature': X_4.columns, 'Coefficient': model_4.coef_})
print(coefficients_4)

# Calculating R-squared for G-4 METERS
r2_4 = r2_score(y_test_4, y_pred_4)
print("R-squared for G-4 METERS:", r2_4)


# Demand forcasting


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

# Assuming 'monthly_df' is your DataFrame containing demand data
# Assuming 'monthly_df' has columns 'Month' and 'Monthly Production (G-4)'
# If not, replace 'Month' and 'Monthly Production (G-4)' with your actual column names

# Data Preparation for G-1.6 METERS
X_1_6 = monthly_df[['Month']]  # Features (only using month)
y_1_6 = monthly_df['Monthly Production (G-1.6)']  # Target variable (monthly production of G-1.6 meters)

# Data Preparation for G-4 METERS
X_4 = monthly_df[['Month']]  # Features (only using month)
y_4 = monthly_df['Monthly Production (G-4)']  # Target variable (monthly production of G-4 meters)

# Normalization for G-1.6 METERS
scaler_1_6 = StandardScaler()
X_normalized_1_6 = scaler_1_6.fit_transform(X_1_6)

# Normalization for G-4 METERS
scaler_4 = StandardScaler()
X_normalized_4 = scaler_4.fit_transform(X_4)

# Splitting the data into training and testing sets for G-1.6 METERS
X_train_1_6, X_test_1_6, y_train_1_6, y_test_1_6 = train_test_split(X_normalized_1_6, y_1_6, test_size=0.2, random_state=42)

# Splitting the data into training and testing sets for G-4 METERS
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(X_normalized_4, y_4, test_size=0.2, random_state=42)

# Training the model for G-1.6 METERS
model_1_6 = LinearRegression()
model_1_6.fit(X_train_1_6, y_train_1_6)

# Training the model for G-4 METERS
model_4 = LinearRegression()
model_4.fit(X_train_4, y_train_4)

# Predictions for G-1.6 METERS
y_pred_1_6 = model_1_6.predict(X_test_1_6)

# Predictions for G-4 METERS
y_pred_4 = model_4.predict(X_test_4)

# Evaluating the model for G-1.6 METERS
mae_1_6 = mean_absolute_error(y_test_1_6, y_pred_1_6)
print("Mean Absolute Error for G-1.6 METERS:", mae_1_6)
# Interpretation and Insights for G-1.6 METERS
coefficients_1_6 = pd.DataFrame({'Feature': X_1_6.columns, 'Coefficient': model_1_6.coef_})
print(coefficients_1_6)

# Evaluating the model for G-4 METERS
mae_4 = mean_absolute_error(y_test_4, y_pred_4)
print("Mean Absolute Error for G-4 METERS:", mae_4)
# Interpretation and Insights for G-4 METERS
coefficients_4 = pd.DataFrame({'Feature': X_4.columns, 'Coefficient': model_4.coef_})
print(coefficients_4)

# Future demand forecasting for G-1.6 METERS
future_months = pd.date_range(start='2024-08-01', periods=2, freq='M')
future_months_numeric = (future_months - pd.to_datetime('2024-01-01')).days
predicted_demand_1_6 = model_1_6.predict(future_months_numeric.values.reshape(-1, 1))
future_demand_1_6 = pd.DataFrame({'Month': future_months, 'Predicted Demand (G-1.6)': predicted_demand_1_6})
print(future_demand_1_6)

# Future demand forecasting for G-4 METERS
predicted_demand_4 = model_4.predict(future_months_numeric.values.reshape(-1, 1))
future_demand_4 = pd.DataFrame({'Month': future_months, 'Predicted Demand (G-4)': predicted_demand_4})
print(future_demand_4)


# SVR

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, r2_score

# Assuming 'monthly_df' is your DataFrame containing demand data
# Assuming 'monthly_df' has columns 'Month' and 'Monthly Production (G-1.6)' and 'Monthly Production (G-4)'
# If not, replace 'Month' and 'Monthly Production (G-1.6)' and 'Monthly Production (G-4)' with your actual column names

# Data Preparation for G-1.6 METERS
X_1_6 = monthly_df[['Month']]  # Features (only using month)
y_1_6 = monthly_df['Monthly Production (G-1.6)']  # Target variable (monthly production of G-1.6 meters)

# Data Preparation for G-4 METERS
X_4 = monthly_df[['Month']]  # Features (only using month)
y_4 = monthly_df['Monthly Production (G-4)']  # Target variable (monthly production of G-4 meters)

# Train-Test Split for G-1.6 METERS
X_train_1_6, X_test_1_6, y_train_1_6, y_test_1_6 = train_test_split(X_1_6, y_1_6, test_size=0.2, random_state=42)

# Train-Test Split for G-4 METERS
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(X_4, y_4, test_size=0.2, random_state=42)

# Feature Scaling for G-1.6 METERS
scaler_1_6 = StandardScaler()
X_train_scaled_1_6 = scaler_1_6.fit_transform(X_train_1_6)
X_test_scaled_1_6 = scaler_1_6.transform(X_test_1_6)

# Feature Scaling for G-4 METERS
scaler_4 = StandardScaler()
X_train_scaled_4 = scaler_4.fit_transform(X_train_4)
X_test_scaled_4 = scaler_4.transform(X_test_4)

# SVR Model for G-1.6 METERS
svr_1_6 = SVR(kernel='rbf')  # Radial Basis Function (RBF) kernel is commonly used
svr_1_6.fit(X_train_scaled_1_6, y_train_1_6)

# SVR Model for G-4 METERS
svr_4 = SVR(kernel='rbf')  # Radial Basis Function (RBF) kernel is commonly used
svr_4.fit(X_train_scaled_4, y_train_4)

# Predictions for G-1.6 METERS
y_pred_1_6 = svr_1_6.predict(X_test_scaled_1_6)

# Predictions for G-4 METERS
y_pred_4 = svr_4.predict(X_test_scaled_4)

# Model Evaluation for G-1.6 METERS
mae_1_6 = mean_absolute_error(y_test_1_6, y_pred_1_6)
print("Mean Absolute Error for G-1.6 METERS:", mae_1_6)

# R-squared for G-1.6 METERS
r2_1_6 = svr_1_6.score(X_test_scaled_1_6, y_test_1_6)
print("R-squared for G-1.6 METERS:", r2_1_6)

# Model Evaluation for G-4 METERS
mae_4 = mean_absolute_error(y_test_4, y_pred_4)
print("Mean Absolute Error for G-4 METERS:", mae_4)

# R-squared for G-4 METERS
r2_4 = svr_4.score(X_test_scaled_4, y_test_4)
print("R-squared for G-4 METERS:", r2_4)


# Demand forcasting

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

# Assuming 'monthly_df' is your DataFrame containing demand data
# Assuming 'monthly_df' has columns 'Month' and 'Monthly Production (G-4)'
# If not, replace 'Month' and 'Monthly Production (G-4)' with your actual column names

# Data Preparation for G-1.6 METERS
X_1_6 = monthly_df[['Month']]  # Features (only using month)
y_1_6 = monthly_df['Monthly Production (G-1.6)']  # Target variable (monthly production of G-1.6 meters)

# Data Preparation for G-4 METERS
X_4 = monthly_df[['Month']]  # Features (only using month)
y_4 = monthly_df['Monthly Production (G-4)']  # Target variable (monthly production of G-4 meters)

# Splitting the data into training and testing sets for G-1.6 METERS
X_train_1_6, X_test_1_6, y_train_1_6, y_test_1_6 = train_test_split(X_1_6, y_1_6, test_size=0.2, random_state=42)

# Splitting the data into training and testing sets for G-4 METERS
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(X_4, y_4, test_size=0.2, random_state=42)

# SVR Model for G-1.6 METERS
svr_1_6 = SVR(kernel='rbf')  # Radial Basis Function (RBF) kernel is commonly used
svr_1_6.fit(X_train_1_6, y_train_1_6)

# SVR Model for G-4 METERS
svr_4 = SVR(kernel='rbf')  # Radial Basis Function (RBF) kernel is commonly used
svr_4.fit(X_train_4, y_train_4)

# Predictions for G-1.6 METERS
y_pred_1_6 = svr_1_6.predict(X_test_1_6)

# Predictions for G-4 METERS
y_pred_4 = svr_4.predict(X_test_4)

# Evaluating the model for G-1.6 METERS
mae_1_6 = mean_absolute_error(y_test_1_6, y_pred_1_6)
print("Mean Absolute Error for G-1.6 METERS:", mae_1_6)

# Future demand forecasting for G-1.6 METERS
future_months = pd.date_range(start='2023-08-01', periods=2, freq='M')
future_months_numeric = (future_months - pd.to_datetime('2023-01-01')).days
predicted_demand_1_6 = svr_1_6.predict(future_months_numeric.values.reshape(-1, 1))
future_demand_1_6 = pd.DataFrame({'Month': future_months, 'Predicted Demand (G-1.6)': predicted_demand_1_6})
print(future_demand_1_6)

# Evaluating the model for G-4 METERS
mae_4 = mean_absolute_error(y_test_4, y_pred_4)
print("Mean Absolute Error for G-4 METERS:", mae_4)

# Future demand forecasting for G-4 METERS
predicted_demand_4 = svr_4.predict(future_months_numeric.values.reshape(-1, 1))
future_demand_4 = pd.DataFrame({'Month': future_months, 'Predicted Demand (G-4)': predicted_demand_4})
print(future_demand_4)


# Random Forest

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

# Assuming 'monthly_df' is your DataFrame containing demand data
# Assuming 'monthly_df' has columns 'Month' and 'Monthly Production (G-1.6)' and 'Monthly Production (G-4)'
# If not, replace 'Month' and 'Monthly Production (G-1.6)' and 'Monthly Production (G-4)' with your actual column names

# Data Preparation for G-1.6 METERS
X_1_6 = monthly_df[['Month']]  # Features (only using month)
y_1_6 = monthly_df['Monthly Production (G-1.6)']  # Target variable (monthly production of G-1.6 meters)

# Data Preparation for G-4 METERS
X_4 = monthly_df[['Month']]  # Features (only using month)
y_4 = monthly_df['Monthly Production (G-4)']  # Target variable (monthly production of G-4 meters)

# Train-Test Split for G-1.6 METERS
X_train_1_6, X_test_1_6, y_train_1_6, y_test_1_6 = train_test_split(X_1_6, y_1_6, test_size=0.2, random_state=42)

# Train-Test Split for G-4 METERS
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(X_4, y_4, test_size=0.2, random_state=42)

# Random Forest Regression Model for G-1.6 METERS
rf_regressor_1_6 = RandomForestRegressor(n_estimators=100, random_state=42)  # You can adjust the number of trees (n_estimators)
rf_regressor_1_6.fit(X_train_1_6, y_train_1_6)

# Random Forest Regression Model for G-4 METERS
rf_regressor_4 = RandomForestRegressor(n_estimators=100, random_state=42)  # You can adjust the number of trees (n_estimators)
rf_regressor_4.fit(X_train_4, y_train_4)

# Predictions for G-1.6 METERS
y_pred_1_6 = rf_regressor_1_6.predict(X_test_1_6)

# Predictions for G-4 METERS
y_pred_4 = rf_regressor_4.predict(X_test_4)

# Model Evaluation for G-1.6 METERS
mae_1_6 = mean_absolute_error(y_test_1_6, y_pred_1_6)
print("Mean Absolute Error for G-1.6 METERS:", mae_1_6)

# R-squared for G-1.6 METERS
r2_1_6 = r2_score(y_test_1_6, y_pred_1_6)
print("R-squared for G-1.6 METERS:", r2_1_6)

# Model Evaluation for G-4 METERS
mae_4 = mean_absolute_error(y_test_4, y_pred_4)
print("Mean Absolute Error for G-4 METERS:", mae_4)

# R-squared for G-4 METERS
r2_4 = r2_score(y_test_4, y_pred_4)
print("R-squared for G-4 METERS:", r2_4)


# Demand Forecasting

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

# Assuming 'monthly_df' is your DataFrame containing demand data
# Assuming 'monthly_df' has columns 'Month', 'G-1.6 METERS', and 'G-4 METERS'
# If not, replace 'Month', 'G-1.6 METERS', and 'G-4 METERS' with your actual column names

# Data Preparation for G-1.6 METERS
X_1_6 = monthly_df[['Month']]  # Features (only using month)
y_1_6 = monthly_df['Monthly Production (G-1.6)']  # Target variable (demand for G-1.6 meters)

# Data Preparation for G-4 METERS
X_4 = monthly_df[['Month']]  # Features (only using month)
y_4 = monthly_df['Monthly Production (G-4)']  # Target variable (demand for G-4 meters)

# Train-Test Split for G-1.6 METERS
X_train_1_6, X_test_1_6, y_train_1_6, y_test_1_6 = train_test_split(X_1_6, y_1_6, test_size=0.2, random_state=42)

# Train-Test Split for G-4 METERS
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(X_4, y_4, test_size=0.2, random_state=42)

# Random Forest Regression Model for G-1.6 METERS
rf_regressor_1_6 = RandomForestRegressor(n_estimators=100, random_state=42)  # You can adjust the number of trees (n_estimators)
rf_regressor_1_6.fit(X_train_1_6, y_train_1_6)

# Random Forest Regression Model for G-4 METERS
rf_regressor_4 = RandomForestRegressor(n_estimators=100, random_state=42)  # You can adjust the number of trees (n_estimators)
rf_regressor_4.fit(X_train_4, y_train_4)

# Predictions for G-1.6 METERS
y_pred_1_6 = rf_regressor_1_6.predict(X_test_1_6)

# Predictions for G-4 METERS
y_pred_4 = rf_regressor_4.predict(X_test_4)

# Calculate MAE and R2 for G-1.6 METERS
mae_1_6 = mean_absolute_error(y_test_1_6, y_pred_1_6)
r2_1_6 = r2_score(y_test_1_6, y_pred_1_6)
print("MAE for G-1.6 METERS:", mae_1_6)
print("R-squared for G-1.6 METERS:", r2_1_6)

# Calculate MAE and R2 for G-4 METERS
mae_4 = mean_absolute_error(y_test_4, y_pred_4)
r2_4 = r2_score(y_test_4, y_pred_4)
print("MAE for G-4 METERS:", mae_4)
print("R-squared for G-4 METERS:", r2_4)

# Future demand forecasting for G-1.6 METERS
future_months_1_6 = pd.date_range(start='2022-01-01', periods=2, freq='M')
future_months_numeric_1_6 = (future_months_1_6 - pd.to_datetime('2022-01-01')).days
predicted_demand_1_6 = rf_regressor_1_6.predict(future_months_numeric_1_6.values.reshape(-1, 1))
future_demand_1_6 = pd.DataFrame({'Month': future_months_1_6, 'Predicted Demand (G-1.6)': predicted_demand_1_6})
print(future_demand_1_6)

# Future demand forecasting for G-4 METERS
future_months_4 = pd.date_range(start='2023-08-01', periods=2, freq='M')
future_months_numeric_4 = (future_months_4 - pd.to_datetime('2023-01-01')).days
predicted_demand_4 = rf_regressor_4.predict(future_months_numeric_4.values.reshape(-1, 1))
future_demand_4 = pd.DataFrame({'Month': future_months_4, 'Predicted Demand (G-4)': predicted_demand_4})
print(future_demand_4)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score

# Assuming 'monthly_df' is your DataFrame containing demand data
# Assuming 'monthly_df' has columns 'Month', 'G-1.6 METERS', and 'G-4 METERS'
# If not, replace 'Month', 'G-1.6 METERS', and 'G-4 METERS' with your actual column names

# Data Preparation for G-1.6 METERS
X_1_6 = monthly_df[['Month']]  # Features (only using month)
y_1_6 = monthly_df['Monthly Production (G-1.6)']  # Target variable (demand for G-1.6 meters)

# Data Preparation for G-4 METERS
X_4 = monthly_df[['Month']]  # Features (only using month)
y_4 = monthly_df['Monthly Production (G-4)']  # Target variable (demand for G-4 meters)

# Train-Test Split for G-1.6 METERS (if needed for validation)
# X_train_1_6, X_test_1_6, y_train_1_6, y_test_1_6 = train_test_split(X_1_6, y_1_6, test_size=0.2, random_state=42)

# Train-Test Split for G-4 METERS (if needed for validation)
# X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(X_4, y_4, test_size=0.2, random_state=42)

# Random Forest Regression Model for G-1.6 METERS
rf_regressor_1_6 = RandomForestRegressor(n_estimators=100, random_state=42)  # You can adjust the number of trees (n_estimators)
rf_regressor_1_6.fit(X_1_6, y_1_6)

# Random Forest Regression Model for G-4 METERS
rf_regressor_4 = RandomForestRegressor(n_estimators=100, random_state=42)  # You can adjust the number of trees (n_estimators)
rf_regressor_4.fit(X_4, y_4)

# Future demand forecasting for G-1.6 METERS
future_months_1_6 = pd.date_range(start='2023-01-01', periods=2, freq='M')
future_months_numeric_1_6 = (future_months_1_6 - pd.to_datetime('2023-01-01')).days
predicted_demand_1_6 = rf_regressor_1_6.predict(future_months_numeric_1_6.values.reshape(-1, 1))
future_demand_1_6 = pd.DataFrame({'Month': future_months_1_6, 'Predicted Demand (G-1.6)': predicted_demand_1_6})
print(future_demand_1_6)

# Future demand forecasting for G-4 METERS
future_months_4 = pd.date_range(start='2023-08-01', periods=2, freq='M')
future_months_numeric_4 = (future_months_4 - pd.to_datetime('2023-01-01')).days
predicted_demand_4 = rf_regressor_4.predict(future_months_numeric_4.values.reshape(-1, 1))
future_demand_4 = pd.DataFrame({'Month': future_months_4, 'Predicted Demand (G-4)': predicted_demand_4})
print(future_demand_4)


# Next 15 days

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor

# Assuming 'monthly_df' is your DataFrame containing demand data
# Assuming 'monthly_df' has columns 'Month', 'G-1.6 METERS', and 'G-4 METERS'
# If not, replace 'Month', 'G-1.6 METERS', and 'G-4 METERS' with your actual column names

# Data Preparation for G-1.6 METERS
X_1_6 = monthly_df[['Month']]  # Features (only using month)
y_1_6 = monthly_df['Monthly Production (G-1.6)']  # Target variable (demand for G-1.6 meters)

# Data Preparation for G-4 METERS
X_4 = monthly_df[['Month']]  # Features (only using month)
y_4 = monthly_df['Monthly Production (G-4)']  # Target variable (demand for G-4 meters)

# Random Forest Regression Model for G-1.6 METERS
rf_regressor_1_6 = RandomForestRegressor(n_estimators=100, random_state=42)  # You can adjust the number of trees (n_estimators)
rf_regressor_1_6.fit(X_1_6, y_1_6)

# Random Forest Regression Model for G-4 METERS
rf_regressor_4 = RandomForestRegressor(n_estimators=100, random_state=42)  # You can adjust the number of trees (n_estimators)
rf_regressor_4.fit(X_4, y_4)

# Function to predict production for a given start date and number of days
def predict_daily_production(model, start_date, n_days, working_days_per_month=25):
    # Predict monthly production
    predicted_demand_monthly = model.predict([[12]])  # Assuming predicting for December (you can adjust this)
    
    # Calculate daily rate based on working days
    daily_rate = predicted_demand_monthly / working_days_per_month
    
    # Scale to desired number of days
    predicted_demand = daily_rate * n_days
    
    return predicted_demand

# Predict total daily production for G-1.6 METERS for the next 15 days
predicted_demand_1_6 = predict_daily_production(rf_regressor_1_6, '2020-06-01', 15)
print("Predicted Demand (G-1.6) for the next 15 days:", predicted_demand_1_6)

# Predict total daily production for G-4 METERS for the next 15 days
predicted_demand_4 = predict_daily_production(rf_regressor_4, '2023-08-01', 15)
print("Predicted Demand (G-4) for the next 15 days:", predicted_demand_4)


# Next 3 Month

In [15]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor

# Assuming 'monthly_df' is your DataFrame containing demand data
# Assuming 'monthly_df' has columns 'Month', 'G-1.6 METERS', and 'G-4 METERS'
# If not, replace 'Month', 'G-1.6 METERS', and 'G-4 METERS' with your actual column names

# Data Preparation for G-1.6 METERS
X_1_6 = monthly_df[['Month']]  # Features (only using month)
y_1_6 = monthly_df['Monthly Production (G-1.6)']  # Target variable (demand for G-1.6 meters)

# Data Preparation for G-4 METERS
X_4 = monthly_df[['Month']]  # Features (only using month)
y_4 = monthly_df['Monthly Production (G-4)']  # Target variable (demand for G-4 meters)

# Random Forest Regression Model for G-1.6 METERS
rf_regressor_1_6 = RandomForestRegressor(n_estimators=100, random_state=42)  # You can adjust the number of trees (n_estimators)
rf_regressor_1_6.fit(X_1_6, y_1_6)

# Random Forest Regression Model for G-4 METERS
rf_regressor_4 = RandomForestRegressor(n_estimators=100, random_state=42)  # You can adjust the number of trees (n_estimators)
rf_regressor_4.fit(X_4, y_4)

# Function to predict production for a given start date and time period
def predict_production_by_period(model, start_date, period, working_days_per_month=25):
    if period.endswith('days'):
        n_days = int(period.split()[0])  # Extract the number of days
        # Predict monthly production
        predicted_demand_monthly = model.predict([[12]])  # Assuming predicting for December (you can adjust this)
        # Calculate daily rate based on working days
        daily_rate = predicted_demand_monthly / working_days_per_month
        # Scale to desired number of days
        predicted_demand = daily_rate * n_days
        return predicted_demand
    elif period.endswith('months'):
        n_months = int(period.split()[0])  # Extract the number of months
        # Predict monthly production
        predicted_demand_monthly = model.predict([[12]])  # Assuming predicting for December (you can adjust this)
        # Scale to desired number of months
        predicted_demand = predicted_demand_monthly * n_months
        return predicted_demand
    elif period.endswith('years'):
        n_years = int(period.split()[0])  # Extract the number of years
        # Predict monthly production
        predicted_demand_monthly = model.predict([[12]])  # Assuming predicting for December (you can adjust this)
        # Scale to desired number of years
        predicted_demand = predicted_demand_monthly * 12 * n_years  # Assuming 12 months per year
        return predicted_demand
    else:
        raise ValueError("Unsupported time period. Please specify time in days, months, or years.")

# Example usage:
try:
    period = input("Enter time period (e.g., '15 days', '3 months', '2 years'): ")
    predicted_demand_1_6 = predict_production_by_period(rf_regressor_1_6, '2023-08-01', period)
    print("Predicted Demand (G-1.6):", predicted_demand_1_6)
    
    predicted_demand_4 = predict_production_by_period(rf_regressor_4, '2023-08-01', period)
    print("Predicted Demand (G-4):", predicted_demand_4)
except ValueError as e:
    print(e)


Enter time period (e.g., '15 days', '3 months', '2 years'): 15 days
Predicted Demand (G-1.6): [147.65]
Predicted Demand (G-4): [0.]




# LSTM

In [None]:
pip install tensorflow

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_absolute_error, r2_score

# Assuming 'monthly_df' is your DataFrame containing demand data
# Assuming 'monthly_df' has columns 'Month', 'Monthly Production (G-1.6)', and 'Monthly Production (G-4)'

# Data Preparation for G-1.6 METERS
X_1_6 = monthly_df[['Month']].values  # Features (only using month)
y_1_6 = monthly_df['Monthly Production (G-1.6)'].values  # Target variable (monthly production of G-1.6 meters)

# Data Preparation for G-4 METERS
X_4 = monthly_df[['Month']].values  # Features (only using month)
y_4 = monthly_df['Monthly Production (G-4)'].values  # Target variable (monthly production of G-4 meters)

# Train-Test Split for G-1.6 METERS
X_train_1_6, X_test_1_6, y_train_1_6, y_test_1_6 = train_test_split(X_1_6, y_1_6, test_size=0.2, random_state=42)

# Train-Test Split for G-4 METERS
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(X_4, y_4, test_size=0.2, random_state=42)

# Normalize the data
scaler_1_6 = MinMaxScaler()
X_train_scaled_1_6 = scaler_1_6.fit_transform(X_train_1_6)
X_test_scaled_1_6 = scaler_1_6.transform(X_test_1_6)

scaler_4 = MinMaxScaler()
X_train_scaled_4 = scaler_4.fit_transform(X_train_4)
X_test_scaled_4 = scaler_4.transform(X_test_4)

# Reshape the data for LSTM input (samples, time steps, features)
n_samples_train_1_6, n_features = X_train_scaled_1_6.shape
n_samples_test_1_6, _ = X_test_scaled_1_6.shape

X_train_reshaped_1_6 = X_train_scaled_1_6.reshape((n_samples_train_1_6, 1, n_features))
X_test_reshaped_1_6 = X_test_scaled_1_6.reshape((n_samples_test_1_6, 1, n_features))

n_samples_train_4, _ = X_train_scaled_4.shape
n_samples_test_4, _ = X_test_scaled_4.shape

X_train_reshaped_4 = X_train_scaled_4.reshape((n_samples_train_4, 1, n_features))
X_test_reshaped_4 = X_test_scaled_4.reshape((n_samples_test_4, 1, n_features))

# Build LSTM model for G-1.6 METERS
model_1_6 = Sequential([
    LSTM(units=50, activation='relu', input_shape=(1, n_features)),
    Dense(units=1)
])
model_1_6.compile(optimizer='adam', loss='mse')

# Build LSTM model for G-4 METERS
model_4 = Sequential([
    LSTM(units=50, activation='relu', input_shape=(1, n_features)),
    Dense(units=1)
])
model_4.compile(optimizer='adam', loss='mse')

# Train the LSTM models
model_1_6.fit(X_train_reshaped_1_6, y_train_1_6, epochs=100, batch_size=32, verbose=1)
model_4.fit(X_train_reshaped_4, y_train_4, epochs=100, batch_size=32, verbose=1)

# Predictions for G-1.6 METERS
y_pred_1_6 = model_1_6.predict(X_test_reshaped_1_6)

# Predictions for G-4 METERS
y_pred_4 = model_4.predict(X_test_reshaped_4)

# Model Evaluation for G-1.6 METERS
mae_1_6 = mean_absolute_error(y_test_1_6, y_pred_1_6)
print("Mean Absolute Error for G-1.6 METERS:", mae_1_6)

r2_1_6 = r2_score(y_test_1_6, y_pred_1_6)
print("R-squared for G-1.6 METERS:", r2_1_6)

# Model Evaluation for G-4 METERS
mae_4 = mean_absolute_error(y_test_4, y_pred_4)
print("Mean Absolute Error for G-4 METERS:", mae_4)

r2_4 = r2_score(y_test_4, y_pred_4)
print("R-squared for G-4 METERS:", r2_4)


# Neural Network

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import mean_absolute_error, r2_score

# Assuming 'monthly_df' is your DataFrame containing demand data
# Assuming 'monthly_df' has columns 'Month', 'Monthly Production (G-1.6)', and 'Monthly Production (G-4)'

# Data Preparation for G-1.6 METERS
X_1_6 = monthly_df[['Month']].values  # Features (only using month)
y_1_6 = monthly_df['Monthly Production (G-1.6)'].values  # Target variable (monthly production of G-1.6 meters)

# Data Preparation for G-4 METERS
X_4 = monthly_df[['Month']].values  # Features (only using month)
y_4 = monthly_df['Monthly Production (G-4)'].values  # Target variable (monthly production of G-4 meters)

# Train-Test Split for G-1.6 METERS
X_train_1_6, X_test_1_6, y_train_1_6, y_test_1_6 = train_test_split(X_1_6, y_1_6, test_size=0.2, random_state=42)

# Train-Test Split for G-4 METERS
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(X_4, y_4, test_size=0.2, random_state=42)

# Normalize the data
scaler_1_6 = MinMaxScaler()
X_train_scaled_1_6 = scaler_1_6.fit_transform(X_train_1_6)
X_test_scaled_1_6 = scaler_1_6.transform(X_test_1_6)

scaler_4 = MinMaxScaler()
X_train_scaled_4 = scaler_4.fit_transform(X_train_4)
X_test_scaled_4 = scaler_4.transform(X_test_4)

# Build Neural Network model for G-1.6 METERS
model_1_6 = Sequential([
    Dense(units=64, activation='relu', input_shape=(X_train_scaled_1_6.shape[1],)),
    Dense(units=32, activation='relu'),
    Dense(units=1)
])
model_1_6.compile(optimizer='adam', loss='mse')

# Build Neural Network model for G-4 METERS
model_4 = Sequential([
    Dense(units=64, activation='relu', input_shape=(X_train_scaled_4.shape[1],)),
    Dense(units=32, activation='relu'),
    Dense(units=1)
])
model_4.compile(optimizer='adam', loss='mse')

# Train the Neural Network models
model_1_6.fit(X_train_scaled_1_6, y_train_1_6, epochs=100, batch_size=32, verbose=1)
model_4.fit(X_train_scaled_4, y_train_4, epochs=100, batch_size=32, verbose=1)

# Predictions for G-1.6 METERS
y_pred_1_6 = model_1_6.predict(X_test_scaled_1_6)

# Predictions for G-4 METERS
y_pred_4 = model_4.predict(X_test_scaled_4)

# Model Evaluation for G-1.6 METERS
mae_1_6 = mean_absolute_error(y_test_1_6, y_pred_1_6)
print("Mean Absolute Error for G-1.6 METERS:", mae_1_6)

r2_1_6 = r2_score(y_test_1_6, y_pred_1_6)
print("R-squared for G-1.6 METERS:", r2_1_6)

# Model Evaluation for G-4 METERS
mae_4 = mean_absolute_error(y_test_4, y_pred_4)
print("Mean Absolute Error for G-4 METERS:", mae_4)

r2_4 = r2_score(y_test_4, y_pred_4)
print("R-squared for G-4 METERS:", r2_4)


# KNN

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler

# Assuming 'monthly_df' is your DataFrame containing demand data
# Assuming 'monthly_df' has columns 'Month' and 'Monthly Production (G-1.6)' and 'Monthly Production (G-4)'
# If not, replace 'Month' and 'Monthly Production (G-1.6)' and 'Monthly Production (G-4)' with your actual column names

# Data Preparation for G-1.6 METERS
X_1_6 = monthly_df[['Month']]  # Features (only using month)
y_1_6 = monthly_df['Monthly Production (G-1.6)']  # Target variable (monthly production of G-1.6 meters)

# Data Preparation for G-4 METERS
X_4 = monthly_df[['Month']]  # Features (only using month)
y_4 = monthly_df['Monthly Production (G-4)']  # Target variable (monthly production of G-4 meters)

# Train-Test Split for G-1.6 METERS
X_train_1_6, X_test_1_6, y_train_1_6, y_test_1_6 = train_test_split(X_1_6, y_1_6, test_size=0.2, random_state=42)

# Train-Test Split for G-4 METERS
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(X_4, y_4, test_size=0.2, random_state=42)

# Standardize the data
scaler_1_6 = StandardScaler()
X_train_scaled_1_6 = scaler_1_6.fit_transform(X_train_1_6)
X_test_scaled_1_6 = scaler_1_6.transform(X_test_1_6)

scaler_4 = StandardScaler()
X_train_scaled_4 = scaler_4.fit_transform(X_train_4)
X_test_scaled_4 = scaler_4.transform(X_test_4)

# KNN Regression Model for G-1.6 METERS
knn_regressor_1_6 = KNeighborsRegressor(n_neighbors=5)  # You can adjust the number of neighbors (n_neighbors)
knn_regressor_1_6.fit(X_train_scaled_1_6, y_train_1_6)

# KNN Regression Model for G-4 METERS
knn_regressor_4 = KNeighborsRegressor(n_neighbors=5)  # You can adjust the number of neighbors (n_neighbors)
knn_regressor_4.fit(X_train_scaled_4, y_train_4)

# Predictions for G-1.6 METERS
y_pred_1_6 = knn_regressor_1_6.predict(X_test_scaled_1_6)

# Predictions for G-4 METERS
y_pred_4 = knn_regressor_4.predict(X_test_scaled_4)

# Model Evaluation for G-1.6 METERS
mae_1_6 = mean_absolute_error(y_test_1_6, y_pred_1_6)
print("Mean Absolute Error for G-1.6 METERS:", mae_1_6)

# R-squared for G-1.6 METERS
r2_1_6 = r2_score(y_test_1_6, y_pred_1_6)
print("R-squared for G-1.6 METERS:", r2_1_6)

# Model Evaluation for G-4 METERS
mae_4 = mean_absolute_error(y_test_4, y_pred_4)
print("Mean Absolute Error for G-4 METERS:", mae_4)

# R-squared for G-4 METERS
r2_4 = r2_score(y_test_4, y_pred_4)
print("R-squared for G-4 METERS:", r2_4)


# Demand Forecasting

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score

# Assuming 'monthly_df' is your DataFrame containing demand data
# Assuming 'monthly_df' has columns 'Month', 'G-1.6 METERS', and 'G-4 METERS'
# If not, replace 'Month', 'G-1.6 METERS', and 'G-4 METERS' with your actual column names

# Data Preparation for G-1.6 METERS
X_1_6 = monthly_df[['Month']]  # Features (only using month)
y_1_6 = monthly_df['Monthly Production (G-1.6)']  # Target variable (demand for G-1.6 meters)

# Data Preparation for G-4 METERS
X_4 = monthly_df[['Month']]  # Features (only using month)
y_4 = monthly_df['Monthly Production (G-4)']  # Target variable (demand for G-4 meters)

# Train-Test Split for G-1.6 METERS
X_train_1_6, X_test_1_6, y_train_1_6, y_test_1_6 = train_test_split(X_1_6, y_1_6, test_size=0.2, random_state=42)

# Train-Test Split for G-4 METERS
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(X_4, y_4, test_size=0.2, random_state=42)

# Standardize the data
scaler_1_6 = StandardScaler()
X_train_scaled_1_6 = scaler_1_6.fit_transform(X_train_1_6)
X_test_scaled_1_6 = scaler_1_6.transform(X_test_1_6)

scaler_4 = StandardScaler()
X_train_scaled_4 = scaler_4.fit_transform(X_train_4)
X_test_scaled_4 = scaler_4.transform(X_test_4)

# KNN Regression Model for G-1.6 METERS
knn_regressor_1_6 = KNeighborsRegressor(n_neighbors=5)  # You can adjust the number of neighbors (n_neighbors)
knn_regressor_1_6.fit(X_train_scaled_1_6, y_train_1_6)

# KNN Regression Model for G-4 METERS
knn_regressor_4 = KNeighborsRegressor(n_neighbors=5)  # You can adjust the number of neighbors (n_neighbors)
knn_regressor_4.fit(X_train_scaled_4, y_train_4)

# Predictions for G-1.6 METERS
y_pred_1_6 = knn_regressor_1_6.predict(X_test_scaled_1_6)

# Predictions for G-4 METERS
y_pred_4 = knn_regressor_4.predict(X_test_scaled_4)

# Calculate MAE and R2 for G-1.6 METERS
mae_1_6 = mean_absolute_error(y_test_1_6, y_pred_1_6)
r2_1_6 = r2_score(y_test_1_6, y_pred_1_6)
print("MAE for G-1.6 METERS:", mae_1_6)
print("R-squared for G-1.6 METERS:", r2_1_6)

# Calculate MAE and R2 for G-4 METERS
mae_4 = mean_absolute_error(y_test_4, y_pred_4)
r2_4 = r2_score(y_test_4, y_pred_4)
print("MAE for G-4 METERS:", mae_4)
print("R-squared for G-4 METERS:", r2_4)

# Future demand forecasting for G-1.6 METERS
future_months_1_6 = pd.date_range(start='2023-08-01', periods=2, freq='M')
future_months_numeric_1_6 = (future_months_1_6 - pd.to_datetime('2023-01-01')).days
predicted_demand_1_6 = knn_regressor_1_6.predict(scaler_1_6.transform(future_months_numeric_1_6.values.reshape(-1, 1)))
future_demand_1_6 = pd.DataFrame({'Month': future_months_1_6, 'Predicted Demand (G-1.6)': predicted_demand_1_6})
print(future_demand_1_6)

# Future demand forecasting for G-4 METERS
future_months_4 = pd.date_range(start='2023-08-01', periods=2, freq='M')
future_months_numeric_4 = (future_months_4 - pd.to_datetime('2023-01-01')).days
predicted_demand_4 = knn_regressor_4.predict(scaler_4.transform(future_months_numeric_4.values.reshape(-1, 1)))
future_demand_4 = pd.DataFrame({'Month': future_months_4, 'Predicted Demand (G-4)': predicted_demand_4})
print(future_demand_4)


In [None]:
import matplotlib.pyplot as plt

# Model names
models = ['Linear Regression', 'SVR', 'Random Forest', 'LSTM', 'Neural Network', 'KNN']

# MAE values for G-1.6 METERS
mae_values_1_6 = [2110.7434269599835, 4398.460241695796, 159.49702064897147, 11489.950789122455, 4480.88268862924, 326.7449852507375]

# R-squared values for G-1.6 METERS
r_squared_values_1_6 = [0.7881955798420623, 0.02910767790355706, 0.9929359503454024, -4.271457705220814, -0.005009137818440035, 0.9875118430126313]

# MAE values for G-4 METERS
mae_values_4 = [6270.315395072457, 11039.68087965169, 179.59642330383755, 13156.945746362737, 11129.414146760924, 367.7696165191741]

# R-squared values for G-4 METERS
r_squared_values_4 = [0.564752769368776, -0.016207630993164246, 0.9977018893694243, -0.9764812966112533, -0.2523992106001538, 0.9951468323875219]

# Create subplots for MAE
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.bar(models, mae_values_1_6, color='blue', label='G-1.6 METERS')
plt.xlabel('Models')
plt.ylabel('Mean Absolute Error (MAE)')
plt.title('MAE Comparison for G-1.6 METERS')
plt.legend()

plt.subplot(1, 2, 2)
plt.bar(models, mae_values_4, color='red', label='G-4 METERS')
plt.xlabel('Models')
plt.ylabel('Mean Absolute Error (MAE)')
plt.title('MAE Comparison for G-4 METERS')
plt.legend()

plt.tight_layout()

# Create subplots for R-squared
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.bar(models, r_squared_values_1_6, color='blue', label='G-1.6 METERS')
plt.xlabel('Models')
plt.ylabel('R-squared (R^2)')
plt.title('R-squared Comparison for G-1.6 METERS')
plt.legend()

plt.subplot(1, 2, 2)
plt.bar(models, r_squared_values_4, color='red', label='G-4 METERS')
plt.xlabel('Models')
plt.ylabel('R-squared (R^2)')
plt.title('R-squared Comparison for G-4 METERS')
plt.legend()

plt.tight_layout()

# Show the plots
plt.show()
