In [2]:
import pandas as pd

# Read production data from Excel file
df= pd.read_excel("expanded_monthly_production.xlsx")
df.head()


Unnamed: 0,Month,G-1.6 Meters,G-4 Meters,Total,Monthly Production (G-1.6),Monthly Production (G-4),Monthly Total Production
0,1975-01-01,2953,0,2953,246.083333,0.0,246.083333
1,1975-02-01,2953,0,2953,246.083333,0.0,246.083333
2,1975-03-01,2953,0,2953,246.083333,0.0,246.083333
3,1975-04-01,2953,0,2953,246.083333,0.0,246.083333
4,1975-05-01,2953,0,2953,246.083333,0.0,246.083333


In [3]:
# Set the date column as the index
df.set_index('Month', inplace=True)

# Initialize an empty list to store the daily data
daily_data = []

# Loop through each row in the DataFrame
for date, row in df.iterrows():
    # Calculate daily production values
    daily_production_g16 = row['Monthly Production (G-1.6)'] / 25
    daily_production_g4 = row['Monthly Production (G-4)'] / 25
    
    # Generate dates for the current month with 25 days
    daily_dates = pd.date_range(start=date - pd.DateOffset(days=date.day-1), periods=25, freq='D')
    
    # Append daily data to the list
    for day in daily_dates:
        daily_data.append([day, daily_production_g16, daily_production_g4])

# Create a new DataFrame from the daily data
df_daily_25 = pd.DataFrame(daily_data, columns=['Date', 'Daily Production (G-1.6)', 'Daily Production (G-4)'])

# Print the resulting DataFrame
print(df_daily_25)

# Save to Excel
output_file = 'Daily_production_new.xlsx'
df_daily_25.to_excel(output_file, index=False)

print("Excel file saved successfully at", output_file)

            Date  Daily Production (G-1.6)  Daily Production (G-4)
0     1975-01-01                  9.843333                0.000000
1     1975-01-02                  9.843333                0.000000
2     1975-01-03                  9.843333                0.000000
3     1975-01-04                  9.843333                0.000000
4     1975-01-05                  9.843333                0.000000
...          ...                       ...                     ...
14095 2021-12-21                948.866667              487.493333
14096 2021-12-22                948.866667              487.493333
14097 2021-12-23                948.866667              487.493333
14098 2021-12-24                948.866667              487.493333
14099 2021-12-25                948.866667              487.493333

[14100 rows x 3 columns]
Excel file saved successfully at Daily_production_new.xlsx


# Model training for G-1.6 & G-4

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error,mean_absolute_error

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Split data into training and testing sets
X = df_daily_25[['Date_ordinal']]
y = df_daily_25['Daily Production (G-1.6)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f'Mean Absolute Error: {mae}')


print(f'R-squared: {r2}')
print(f'Mean Squared Error: {mse}')

# Predict future production
future_dates = pd.date_range(start='2022-06-01', periods=5, freq='M')
future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
future_production = model.predict(future_dates_ordinal)

# Print future predictions
print('Future Dates: ', future_dates)
print('Predicted Production (G-1.6): ', future_production)


Mean Absolute Error: 72.77530806664882
R-squared: 0.8061314637015778
Mean Squared Error: 8361.273002446213
Future Dates:  DatetimeIndex(['2022-06-30', '2022-07-31', '2022-08-31', '2022-09-30',
               '2022-10-31'],
              dtype='datetime64[ns]', freq='M')
Predicted Production (G-1.6):  [800.71447183 801.89517927 803.07588672 804.21850683 805.39921428]




In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Combine the target variables for both meters
X = df_daily_25[['Date_ordinal']]
y_g16 = df_daily_25['Daily Production (G-1.6)']
y_g4 = df_daily_25['Daily Production (G-4)']

# Split data into training and testing sets for both meters
X_train, X_test, y_train_g16, y_test_g16, y_train_g4, y_test_g4 = train_test_split(
    X, y_g16, y_g4, test_size=0.2, random_state=42)

# Train the linear regression models for both meters
model_g16 = LinearRegression()
model_g4 = LinearRegression()
model_g16.fit(X_train, y_train_g16)
model_g4.fit(X_train, y_train_g4)

# Predict on the test set for both meters
y_pred_g16 = model_g16.predict(X_test)
y_pred_g4 = model_g4.predict(X_test)

# Combine predictions and actual values for both meters
y_test_combined = pd.concat([y_test_g16, y_test_g4])
y_pred_combined = pd.concat([pd.Series(y_pred_g16), pd.Series(y_pred_g4)])

# Evaluate the combined model
r2_combined = r2_score(y_test_combined, y_pred_combined)
mae_combined = mean_absolute_error(y_test_combined, y_pred_combined)

print(f'Combined R-squared: {r2_combined}')
print(f'Combined Mean Absolute Error: {mae_combined}')

# Predict future production for both meters
future_dates = pd.date_range(start='2023-06-01', periods=5, freq='M')
future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
future_production_g16 = model_g16.predict(future_dates_ordinal)
future_production_g4 = model_g4.predict(future_dates_ordinal)

# Print future predictions for both meters
print('Future Dates: ', future_dates)
print('G-1.6 - Predicted Production: ', future_production_g16)
print('G-4 - Predicted Production: ', future_production_g4)


Combined R-squared: 0.6911060173482821
Combined Mean Absolute Error: 160.32217258056272
Future Dates:  DatetimeIndex(['2023-06-30', '2023-07-31', '2023-08-31', '2023-09-30',
               '2023-10-31'],
              dtype='datetime64[ns]', freq='M')
G-1.6 - Predicted Production:  [814.61634985 815.7970573  816.97776475 818.12038486 819.30109231]
G-4 - Predicted Production:  [1468.16673636 1471.05264839 1473.93856041 1476.7313785  1479.61729053]




In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Split data into training and testing sets
X = df_daily_25[['Date_ordinal']]
y = df_daily_25['Daily Production (G-1.6)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest regression model
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f'R-squared: {r2}')
print(f'Mean Squared Error: {mse}')

# Predict future production
future_dates = pd.date_range(start='2022-06-01', periods=5, freq='M')
future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
future_production = model.predict(future_dates_ordinal)

# Print future predictions
print('Future Dates: ', future_dates)
print('Predicted Production (G-1.6): ', future_production)


R-squared: 0.9999997360307109
Mean Squared Error: 0.011384618321513147
Future Dates:  DatetimeIndex(['2022-06-30', '2022-07-31', '2022-08-31', '2022-09-30',
               '2022-10-31'],
              dtype='datetime64[ns]', freq='M')
Predicted Production (G-1.6):  [948.86666667 948.86666667 948.86666667 948.86666667 948.86666667]




In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score, mean_squared_error

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Split data into training and testing sets
X = df_daily_25[['Date_ordinal']]
y = df_daily_25['Daily Production (G-1.6)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the KNN regression model
model = KNeighborsRegressor(n_neighbors=5)  # Example: using 5 nearest neighbors
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f'R-squared: {r2}')
print(f'Mean Squared Error: {mse}')

# Predict future production
future_dates = pd.date_range(start='2023-06-01', periods=5, freq='M')
future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
future_production = model.predict(future_dates_ordinal)

# Print future predictions
print('Future Dates: ', future_dates)
print('Predicted Production (G-1.6): ', future_production)


R-squared: 0.9999823069307295
Mean Squared Error: 0.7630768006304174
Future Dates:  DatetimeIndex(['2023-06-30', '2023-07-31', '2023-08-31', '2023-09-30',
               '2023-10-31'],
              dtype='datetime64[ns]', freq='M')
Predicted Production (G-1.6):  [948.86666667 948.86666667 948.86666667 948.86666667 948.86666667]




In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score, mean_absolute_error

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Combine the target variables for both meters
X = df_daily_25[['Date_ordinal']]
y_g16 = df_daily_25['Daily Production (G-1.6)']
y_g4 = df_daily_25['Daily Production (G-4)']

# Split data into training and testing sets for both meters
X_train, X_test, y_train_g16, y_test_g16, y_train_g4, y_test_g4 = train_test_split(
    X, y_g16, y_g4, test_size=0.2, random_state=42)

# Train the KNN regression models for both meters
model_g16 = KNeighborsRegressor(n_neighbors=5)
model_g4 = KNeighborsRegressor(n_neighbors=5)
model_g16.fit(X_train, y_train_g16)
model_g4.fit(X_train, y_train_g4)

# Predict on the test set for both meters
y_pred_g16 = model_g16.predict(X_test)
y_pred_g4 = model_g4.predict(X_test)

# Combine predictions and actual values for both meters
y_test_combined = pd.concat([y_test_g16, y_test_g4])
y_pred_combined = pd.concat([pd.Series(y_pred_g16), pd.Series(y_pred_g4)])

# Evaluate the combined model
r2_combined = r2_score(y_test_combined, y_pred_combined)
mae_combined = mean_absolute_error(y_test_combined, y_pred_combined)

print(f'Combined R-squared: {r2_combined}')
print(f'Combined Mean Absolute Error: {mae_combined}')

# Predict future production for both meters
future_dates = pd.date_range(start='2024-06-01', periods=5, freq='M')
future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
future_production_g16 = model_g16.predict(future_dates_ordinal)
future_production_g4 = model_g4.predict(future_dates_ordinal)

# Print future predictions for both meters
print('Future Dates: ', future_dates)
print('G-1.6 - Predicted Production: ', future_production_g16)
print('G-4 - Predicted Production: ', future_production_g4)


Combined R-squared: 0.9999721469811006
Combined Mean Absolute Error: 0.06261666666667094
Future Dates:  DatetimeIndex(['2024-06-30', '2024-07-31', '2024-08-31', '2024-09-30',
               '2024-10-31'],
              dtype='datetime64[ns]', freq='M')
G-1.6 - Predicted Production:  [948.86666667 948.86666667 948.86666667 948.86666667 948.86666667]
G-4 - Predicted Production:  [487.49333333 487.49333333 487.49333333 487.49333333 487.49333333]




In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_absolute_error

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Combine the target variables for both meters
X = df_daily_25[['Date_ordinal']]
y_g16 = df_daily_25['Daily Production (G-1.6)']
y_g4 = df_daily_25['Daily Production (G-4)']

# Split data into training and testing sets for both meters
X_train, X_test, y_train_g16, y_test_g16, y_train_g4, y_test_g4 = train_test_split(
    X, y_g16, y_g4, test_size=0.2, random_state=42)

# Train the Random Forest regression models for both meters
model_g16 = RandomForestRegressor(random_state=42)
model_g4 = RandomForestRegressor(random_state=42)
model_g16.fit(X_train, y_train_g16)
model_g4.fit(X_train, y_train_g4)

# Predict on the test set for both meters
y_pred_g16 = model_g16.predict(X_test)
y_pred_g4 = model_g4.predict(X_test)

# Combine predictions and actual values for both meters
y_test_combined = pd.concat([y_test_g16, y_test_g4])
y_pred_combined = pd.concat([pd.Series(y_pred_g16), pd.Series(y_pred_g4)])

# Evaluate the combined model
r2_combined = r2_score(y_test_combined, y_pred_combined)
mae_combined = mean_absolute_error(y_test_combined, y_pred_combined)

print(f'Combined R-squared: {r2_combined}')
print(f'Combined Mean Absolute Error: {mae_combined}')

# Predict future production for both meters
future_dates = pd.date_range(start='2024-06-01', periods=5, freq='M')
future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
future_production_g16 = model_g16.predict(future_dates_ordinal)
future_production_g4 = model_g4.predict(future_dates_ordinal)

# Print future predictions for both meters
print('Future Dates: ', future_dates)
print('G-1.6 - Predicted Production: ', future_production_g16)
print('G-4 - Predicted Production: ', future_production_g4)


Combined R-squared: 0.9999998606505591
Combined Mean Absolute Error: 0.006682340425976219
Future Dates:  DatetimeIndex(['2024-06-30', '2024-07-31', '2024-08-31', '2024-09-30',
               '2024-10-31'],
              dtype='datetime64[ns]', freq='M')
G-1.6 - Predicted Production:  [948.86666667 948.86666667 948.86666667 948.86666667 948.86666667]
G-4 - Predicted Production:  [487.49333333 487.49333333 487.49333333 487.49333333 487.49333333]




In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_squared_error

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Split data into training and testing sets
X = df_daily_25[['Date_ordinal']]
y = df_daily_25['Daily Production (G-1.6)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the SVR model
model = SVR(kernel='rbf', C=1.0, epsilon=0.1)  # Example parameters, you may need to tune these
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f'R-squared: {r2}')
print(f'Mean Squared Error: {mse}')

# Predict future production
future_dates = pd.date_range(start='2024-06-01', periods=5, freq='D')
future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
future_production = model.predict(future_dates_ordinal)

# Print future predictions
print('Future Dates: ', future_dates)
print('Predicted Production (G-1.6): ', future_production)


R-squared: 0.8205894808595434
Mean Squared Error: 7737.719377706823
Future Dates:  DatetimeIndex(['2024-06-01', '2024-06-02', '2024-06-03', '2024-06-04',
               '2024-06-05'],
              dtype='datetime64[ns]', freq='D')
Predicted Production (G-1.6):  [693.24031544 693.19934597 693.1583689  693.11738422 693.07639196]




In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_absolute_error

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Combine the target variables for both meters
X = df_daily_25[['Date_ordinal']]
y_g16 = df_daily_25['Daily Production (G-1.6)']
y_g4 = df_daily_25['Daily Production (G-4)']

# Split data into training and testing sets for both meters
X_train, X_test, y_train_g16, y_test_g16, y_train_g4, y_test_g4 = train_test_split(
    X, y_g16, y_g4, test_size=0.2, random_state=42)

# Train the SVR models for both meters
model_g16 = SVR(kernel='rbf', C=1.0, epsilon=0.1)
model_g4 = SVR(kernel='rbf', C=1.0, epsilon=0.1)
model_g16.fit(X_train, y_train_g16)
model_g4.fit(X_train, y_train_g4)

# Predict on the test set for both meters
y_pred_g16 = model_g16.predict(X_test)
y_pred_g4 = model_g4.predict(X_test)

# Combine predictions and actual values for both meters
y_test_combined = pd.concat([y_test_g16, y_test_g4])
y_pred_combined = pd.concat([pd.Series(y_pred_g16), pd.Series(y_pred_g4)])

# Evaluate the combined model
r2_combined = r2_score(y_test_combined, y_pred_combined)
mae_combined = mean_absolute_error(y_test_combined, y_pred_combined)

print(f'Combined R-squared: {r2_combined}')
print(f'Combined Mean Absolute Error: {mae_combined}')

# Predict future production for both meters
future_dates = pd.date_range(start='2024-08-01', periods=5, freq='D')
future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
future_production_g16 = model_g16.predict(future_dates_ordinal)
future_production_g4 = model_g4.predict(future_dates_ordinal)

# Print future predictions for both meters
print('Future Dates: ', future_dates)
print('G-1.6 - Predicted Production: ', future_production_g16)
print('G-4 - Predicted Production: ', future_production_g4)



Combined R-squared: 0.8616035808626418
Combined Mean Absolute Error: 105.8382666541378
Future Dates:  DatetimeIndex(['2024-08-01', '2024-08-02', '2024-08-03', '2024-08-04',
               '2024-08-05'],
              dtype='datetime64[ns]', freq='D')
G-1.6 - Predicted Production:  [690.7275482  690.6861295  690.6447037  690.60327079 690.56183078]
G-4 - Predicted Production:  [575.14751608 575.00800865 574.86855515 574.7291556  574.58981001]




In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Combine the target variables for both meters
X = df_daily_25[['Date_ordinal']]
y_g16 = df_daily_25['Daily Production (G-1.6)']
y_g4 = df_daily_25['Daily Production (G-4)']

# Split data into training and testing sets for both meters
X_train, X_test, y_train_g16, y_test_g16, y_train_g4, y_test_g4 = train_test_split(
    X, y_g16, y_g4, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the SVR models for both meters
model_g16 = SVR(kernel='rbf', C=1.0, epsilon=0.1)
model_g4 = SVR(kernel='rbf', C=1.0, epsilon=0.1)
model_g16.fit(X_train_scaled, y_train_g16)
model_g4.fit(X_train_scaled, y_train_g4)

# Predict on the test set for both meters
y_pred_g16 = model_g16.predict(X_test_scaled)
y_pred_g4 = model_g4.predict(X_test_scaled)

# Combine predictions and actual values for both meters
y_test_combined = pd.concat([y_test_g16, y_test_g4])
y_pred_combined = pd.concat([pd.Series(y_pred_g16), pd.Series(y_pred_g4)])

# Evaluate the combined model
r2_combined = r2_score(y_test_combined, y_pred_combined)
mae_combined = mean_absolute_error(y_test_combined, y_pred_combined)

print(f'Combined R-squared: {r2_combined}')
print(f'Combined Mean Absolute Error: {mae_combined}')

# Predict future production for both meters
future_dates = pd.date_range(start='2024-08-01', periods=5, freq='D')
future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
future_dates_scaled = scaler.transform(future_dates_ordinal)
future_production_g16 = model_g16.predict(future_dates_scaled)
future_production_g4 = model_g4.predict(future_dates_scaled)

# Print future predictions for both meters
print('Future Dates: ', future_dates)
print('G-1.6 - Predicted Production: ', future_production_g16)
print('G-4 - Predicted Production: ', future_production_g4)

# Function to predict production for a given start date and time period
def predict_production_by_period(model, scaler, start_date, period, working_days_per_month=25):
    start_date_ordinal = pd.Timestamp(start_date).toordinal()
    if period.endswith('days'):
        n_days = int(period.split()[0])  # Extract the number of days
        future_dates = pd.date_range(start=start_date, periods=n_days, freq='D')
        future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
        future_dates_scaled = scaler.transform(future_dates_ordinal)
        predicted_daily_production = model.predict(future_dates_scaled)
        predicted_production = predicted_daily_production.sum()  # Sum daily predictions for total period
    elif period.endswith('months'):
        n_months = int(period.split()[0])  # Extract the number of months
        predicted_daily_production = model.predict(scaler.transform([[start_date_ordinal]]))  # Predict for start date
        predicted_production = predicted_daily_production * working_days_per_month * n_months  # Scale to desired number of months
    elif period.endswith('years'):
        n_years = int(period.split()[0])  # Extract the number of years
        predicted_daily_production = model.predict(scaler.transform([[start_date_ordinal]]))  # Predict for start date
        predicted_production = predicted_daily_production * working_days_per_month * 12 * n_years  # Scale to desired number of years
    else:
        raise ValueError("Unsupported time period. Please specify time in days, months, or years.")
    return predicted_production

# Example usage:
try:
    period = input("Enter time period (e.g., '15 days', '3 months', '2 years'): ")
    predicted_demand_1_6 = predict_production_by_period(model_g16, scaler, '2024-08-01', period)
    print("Predicted Demand (G-1.6):", predicted_demand_1_6)
    
    predicted_demand_4 = predict_production_by_period(model_g4, scaler, '2024-08-01', period)
    print("Predicted Demand (G-4):", predicted_demand_4)
except ValueError as e:
    print(e)




Combined R-squared: 0.8616035808626418
Combined Mean Absolute Error: 105.8382666541378
Future Dates:  DatetimeIndex(['2024-08-01', '2024-08-02', '2024-08-03', '2024-08-04',
               '2024-08-05'],
              dtype='datetime64[ns]', freq='D')
G-1.6 - Predicted Production:  [690.7275482  690.6861295  690.6447037  690.60327079 690.56183078]
G-4 - Predicted Production:  [575.14751608 575.00800865 574.86855515 574.7291556  574.58981001]
Enter time period (e.g., '15 days', '3 months', '2 years'): 1 years
Predicted Demand (G-1.6): [207218.26445916]
Predicted Demand (G-4): [172544.25482502]




In [38]:
import pandas as pd
from tabulate import tabulate
from colorama import Fore, Style

# Item numbers from the image
item_numbers = [51042683,51042363,51042203,52010403,51042823]

# Sample DataFrame creation using the provided item numbers
df_v3 = pd.DataFrame({
    'Item_number': item_numbers, 
    'Item_desc': [f"Item_{i}" for i in item_numbers], 
    'Unit_of_measure': ['pcs'] * len(item_numbers), 
    'Quantity': [1, 2, 5, 4, 0.000444]  # Sample quantities for each item
})

# Function to calculate quantity of items for given quantity of meter production
def calculate_items_quantity(df, meter_production):
    # Calculate the quantity of items based on the provided meter production
    df['Quantity_needed'] = df['Quantity'] * meter_production
    return df[['Item_number', 'Item_desc', 'Unit_of_measure', 'Quantity_needed']]

# Example usage:
meter_production_input = 172544.25  # You can change this value to whatever you need

# Calculate quantities for the given meter production
result_df = calculate_items_quantity(df_v3, meter_production_input)

# Display the result in a colorful table
print(f"{Fore.BLUE}Quantity of items for meter production = {meter_production_input}:{Style.RESET_ALL}")
table = tabulate(result_df, headers='keys', tablefmt='fancy_grid', showindex=False)
print(table)


[34mQuantity of items for meter production = 172544.25:[0m
╒═══════════════╤═══════════════╤═══════════════════╤═══════════════════╕
│   Item_number │ Item_desc     │ Unit_of_measure   │   Quantity_needed │
╞═══════════════╪═══════════════╪═══════════════════╪═══════════════════╡
│      51042683 │ Item_51042683 │ pcs               │       172544      │
├───────────────┼───────────────┼───────────────────┼───────────────────┤
│      51042363 │ Item_51042363 │ pcs               │       345088      │
├───────────────┼───────────────┼───────────────────┼───────────────────┤
│      51042203 │ Item_51042203 │ pcs               │       862721      │
├───────────────┼───────────────┼───────────────────┼───────────────────┤
│      52010403 │ Item_52010403 │ pcs               │       690177      │
├───────────────┼───────────────┼───────────────────┼───────────────────┤
│      51042823 │ Item_51042823 │ pcs               │           76.6096 │
╘═══════════════╧═══════════════╧══════════════════

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Split data into training and testing sets
X = df_daily_25[['Date_ordinal']]
y = df_daily_25['Daily Production (G-1.6)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the NN model
model = Sequential([
    Dense(64, activation='relu', input_shape=(1,)),  # Example: 1 input feature, 64 neurons in the first hidden layer
    Dense(32, activation='relu'),  # Example: 32 neurons in the second hidden layer
    Dense(1)  # Output layer (1 neuron for regression)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)  # Example: training for 50 epochs, batch size of 32

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f'R-squared: {r2}')
print(f'Mean Squared Error: {mse}')

# Predict future production
future_dates = pd.date_range(start='2024-06-01', periods=5, freq='M')
future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
future_production = model.predict(future_dates_ordinal)

# Print future predictions
print('Future Dates: ', future_dates)
print('Predicted Production (G-1.6): ', future_production.flatten())


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Combine the target variables for both meters
X = df_daily_25[['Date_ordinal']]
y_g16 = df_daily_25['Daily Production (G-1.6)']
y_g4 = df_daily_25['Daily Production (G-4)']

# Split data into training and testing sets for both meters
X_train, X_test, y_train_g16, y_test_g16, y_train_g4, y_test_g4 = train_test_split(
    X, y_g16, y_g4, test_size=0.2, random_state=42)

# Define the NN model
def create_model():
    model = Sequential([
        Dense(64, activation='relu', input_shape=(1,)),  # Example: 1 input feature, 64 neurons in the first hidden layer
        Dense(32, activation='relu'),  # Example: 32 neurons in the second hidden layer
        Dense(1)  # Output layer (1 neuron for regression)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Train the NN models for both meters
model_g16 = create_model()
model_g4 = create_model()
model_g16.fit(X_train, y_train_g16, epochs=50, batch_size=32, verbose=1)
model_g4.fit(X_train, y_train_g4, epochs=50, batch_size=32, verbose=1)

# Predict on the test set for both meters
y_pred_g16 = model_g16.predict(X_test)
y_pred_g4 = model_g4.predict(X_test)

# Combine predictions and actual values for both meters
y_test_combined = pd.concat([y_test_g16, y_test_g4])
y_pred_combined = pd.concat([pd.Series(y_pred_g16.flatten()), pd.Series(y_pred_g4.flatten())])

# Evaluate the combined model
r2_combined = r2_score(y_test_combined, y_pred_combined)
mae_combined = mean_absolute_error(y_test_combined, y_pred_combined)

print(f'Combined R-squared: {r2_combined}')
print(f'Combined Mean Absolute Error: {mae_combined}')

# Predict future production for both meters
future_dates = pd.date_range(start='2024-06-01', periods=5, freq='M')
future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1)
future_production_g16 = model_g16.predict(future_dates_ordinal)
future_production_g4 = model_g4.predict(future_dates_ordinal)

# Print future predictions for both meters
print('Future Dates: ', future_dates)
print('G-1.6 - Predicted Production: ', future_production_g16.flatten())
print('G-4 - Predicted Production: ', future_production_g4.flatten())


# NN with fine tuning

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Combine the target variables for both meters
X = df_daily_25[['Date_ordinal']]
y_g16 = df_daily_25['Daily Production (G-1.6)']
y_g4 = df_daily_25['Daily Production (G-4)']

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets for both meters
X_train, X_test, y_train_g16, y_test_g16, y_train_g4, y_test_g4 = train_test_split(
    X_scaled, y_g16, y_g4, test_size=0.2, random_state=42)

# Define the NN model
def create_model():
    model = Sequential([
        Dense(128, activation='relu', input_shape=(1,)),  # Increased number of neurons
        BatchNormalization(),  # Added batch normalization
        Dropout(0.3),  # Added dropout layer with 30% dropout rate
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(32, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(1)  # Output layer (1 neuron for regression)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the NN models for both meters
model_g16 = create_model()
model_g4 = create_model()
history_g16 = model_g16.fit(X_train, y_train_g16, epochs=200, batch_size=16, verbose=1, validation_split=0.2, callbacks=[early_stopping])
history_g4 = model_g4.fit(X_train, y_train_g4, epochs=200, batch_size=16, verbose=1, validation_split=0.2, callbacks=[early_stopping])

# Predict on the test set for both meters
y_pred_g16 = model_g16.predict(X_test)
y_pred_g4 = model_g4.predict(X_test)

# Combine predictions and actual values for both meters
y_test_combined = pd.concat([y_test_g16, y_test_g4])
y_pred_combined = pd.concat([pd.Series(y_pred_g16.flatten()), pd.Series(y_pred_g4.flatten())])

# Evaluate the combined model
r2_combined = r2_score(y_test_combined, y_pred_combined)
mae_combined = mean_absolute_error(y_test_combined, y_pred_combined)

print(f'Combined R-squared: {r2_combined}')
print(f'Combined Mean Absolute Error: {mae_combined}')

# Predict future production for both meters
future_dates = pd.date_range(start='2024-06-01', periods=5, freq='M')
future_dates_ordinal = scaler.transform(future_dates.map(pd.Timestamp.toordinal).values.reshape(-1, 1))
future_production_g16 = model_g16.predict(future_dates_ordinal)
future_production_g4 = model_g4.predict(future_dates_ordinal)

# Print future predictions for both meters
print('Future Dates: ', future_dates)
print('G-1.6 - Predicted Production: ', future_production_g16.flatten())
print('G-4 - Predicted Production: ', future_production_g4.flatten())


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Split data into training and testing sets
X = df_daily_25[['Date_ordinal']]
y = df_daily_25['Daily Production (G-1.6)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape data for LSTM input: [samples, time steps, features]
X_train = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Define the LSTM model
model = Sequential([
    LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])),  # Example: 50 LSTM units
    Dense(1)  # Output layer (1 neuron for regression)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1)  # Example: training for 50 epochs, batch size of 32

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f'R-squared: {r2}')
print(f'Mean Squared Error: {mse}')

# Predict future production
future_dates = pd.date_range(start='2024-06-01', periods=5, freq='M')
future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape((-1, 1, 1))
future_production = model.predict(future_dates_ordinal)

# Print future predictions
print('Future Dates: ', future_dates)
print('Predicted Production (G-1.6): ', future_production.flatten())


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Combine the target variables for both meters
X = df_daily_25[['Date_ordinal']]
y_g16 = df_daily_25['Daily Production (G-1.6)']
y_g4 = df_daily_25['Daily Production (G-4)']

# Split data into training and testing sets for both meters
X_train, X_test, y_train_g16, y_test_g16, y_train_g4, y_test_g4 = train_test_split(
    X, y_g16, y_g4, test_size=0.2, random_state=42)

# Reshape data for LSTM input: [samples, time steps, features]
X_train = X_train.values.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.values.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Define the LSTM model
def create_lstm_model():
    model = Sequential([
        LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])),  # Example: 50 LSTM units
        Dense(1)  # Output layer (1 neuron for regression)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Train the LSTM models for both meters
model_g16 = create_lstm_model()
model_g4 = create_lstm_model()

model_g16.fit(X_train, y_train_g16, epochs=50, batch_size=32, verbose=1)
model_g4.fit(X_train, y_train_g4, epochs=50, batch_size=32, verbose=1)

# Predict on the test set for both meters
y_pred_g16 = model_g16.predict(X_test)
y_pred_g4 = model_g4.predict(X_test)

# Combine predictions and actual values for both meters
y_test_combined = pd.concat([y_test_g16, y_test_g4])
y_pred_combined = pd.concat([pd.Series(y_pred_g16.flatten()), pd.Series(y_pred_g4.flatten())])

# Evaluate the combined model
r2_combined = r2_score(y_test_combined, y_pred_combined)
mae_combined = mean_absolute_error(y_test_combined, y_pred_combined)

print(f'Combined R-squared: {r2_combined}')
print(f'Combined Mean Absolute Error: {mae_combined}')

# Predict future production for both meters
future_dates = pd.date_range(start='2024-06-01', periods=5, freq='M')
future_dates_ordinal = future_dates.map(pd.Timestamp.toordinal).values.reshape((-1, 1, 1))
future_production_g16 = model_g16.predict(future_dates_ordinal)
future_production_g4 = model_g4.predict(future_dates_ordinal)

# Print future predictions for both meters
print('Future Dates: ', future_dates)
print('G-1.6 - Predicted Production: ', future_production_g16.flatten())
print('G-4 - Predicted Production: ', future_production_g4.flatten())


# LSTM with fine tuning

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Assuming df_daily_25 is your daily production DataFrame
# Make sure your DataFrame is loaded correctly if it's not in the current environment

# Prepare data for the model
df_daily_25['Date_ordinal'] = df_daily_25['Date'].map(pd.Timestamp.toordinal)

# Combine the target variables for both meters
X = df_daily_25[['Date_ordinal']]
y_g16 = df_daily_25['Daily Production (G-1.6)']
y_g4 = df_daily_25['Daily Production (G-4)']

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets for both meters
X_train, X_test, y_train_g16, y_test_g16, y_train_g4, y_test_g4 = train_test_split(
    X_scaled, y_g16, y_g4, test_size=0.2, random_state=42)

# Reshape data for LSTM input: [samples, time steps, features]
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

# Define the LSTM model
def create_lstm_model():
    model = Sequential([
        LSTM(100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),  # Increased LSTM units
        Dropout(0.2),  # Dropout layer
        LSTM(50),  # Additional LSTM layer
        Dropout(0.2),  # Dropout layer
        Dense(1)  # Output layer
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# Train the LSTM models for both meters
model_g16 = create_lstm_model()
model_g4 = create_lstm_model()

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model_g16.fit(X_train, y_train_g16, epochs=150, batch_size=32, verbose=1, validation_split=0.2, callbacks=[early_stopping])
model_g4.fit(X_train, y_train_g4, epochs=150, batch_size=32, verbose=1, validation_split=0.2, callbacks=[early_stopping])

# Predict on the test set for both meters
y_pred_g16 = model_g16.predict(X_test)
y_pred_g4 = model_g4.predict(X_test)

# Combine predictions and actual values for both meters
y_test_combined = pd.concat([y_test_g16.reset_index(drop=True), y_test_g4.reset_index(drop=True)])
y_pred_combined = pd.concat([pd.Series(y_pred_g16.flatten()), pd.Series(y_pred_g4.flatten())])

# Evaluate the combined model
r2_combined = r2_score(y_test_combined, y_pred_combined)
mae_combined = mean_absolute_error(y_test_combined, y_pred_combined)

print(f'Combined R-squared: {r2_combined}')
print(f'Combined Mean Absolute Error: {mae_combined}')

# Predict future production for both meters
future_dates = pd.date_range(start='2024-06-01', periods=5, freq='M')
future_dates_ordinal = scaler.transform(future_dates.map(pd.Timestamp.toordinal).values.reshape((-1, 1)))
future_dates_ordinal = future_dates_ordinal.reshape((-1, 1, 1))
future_production_g16 = model_g16.predict(future_dates_ordinal)
future_production_g4 = model_g4.predict(future_dates_ordinal)

# Print future predictions for both meters
print('Future Dates: ', future_dates)
print('G-1.6 - Predicted Production: ', future_production_g16.flatten())
print('G-4 - Predicted Production: ', future_production_g4.flatten())


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Model names
models = ['Linear Regression', 'SVR', 'Neural Network', 'LSTM', 'Random Forest', 'KNN']

# MAE values
mae_values = [160.32, 105.83, 348.08, 421.11, 0.0067, 0.062]

# R-squared values
r_squared_values = [69.11, 86.16, -4.96, -61.86, 99.99, 99.99]

# Create a bar width
bar_width = 0.35

# Set up figure and axes
fig, ax = plt.subplots(figsize=(10, 6))

# Bar plot for MAE
bar1 = ax.bar(np.arange(len(models)), mae_values, bar_width, label='MAE', color='blue')

# Bar plot for R-squared
bar2 = ax.bar(np.arange(len(models)) + bar_width, r_squared_values, bar_width, label='R-squared', color='green')

# Set x-axis labels
ax.set_xticks(np.arange(len(models)) + bar_width / 2)
ax.set_xticklabels(models)

# Set labels and title
ax.set_ylabel('Scores')
ax.set_title('MAE and R-squared Comparison of Regression Models')
ax.legend()

# Show the plot
plt.show()
