In [12]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller, acf, pacf
from statsmodels.tsa.arima.model import ARIMA
from datetime import datetime
from datetime import datetime
from dateutil.relativedelta import relativedelta
import matplotlib.pyplot as plt 


class ARIMAModel:
    def __init__(self, file, column_index):
        self.dataframe = self.process_dataset(file)
        self.column_index = column_index-1

    def process_dataset(self, file):
        df = pd.read_csv(file)
        df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])
        for i in range(1, 5):
            if df.iloc[:, i].dtype == 'object':
                df.iloc[:, i] = df.iloc[:, i].str.replace(',', '')
            df.iloc[:, i] = df.iloc[:, i].astype(float)
        df.set_index(df.columns[0], inplace=True)
        return df

    def stationarity_tests(self, timeseries):
        result_adf_trend = adfuller(timeseries, autolag='AIC')
        adf_pvalue_trend = result_adf_trend[1]

        num_diff_trend = 0
        while adf_pvalue_trend > 0.05:
            timeseries = timeseries.diff().dropna()
            result_adf_trend = adfuller(timeseries, autolag='AIC')
            adf_pvalue_trend = result_adf_trend[1]
            num_diff_trend += 1

        return timeseries, num_diff_trend

    def print_acf_pacf_lags(self, timeseries, trend_diff):
        acf_orig = acf(timeseries, fft=True, nlags=20)
        pacf_orig = pacf(timeseries, nlags=20)

        acf_diff_trend = acf(timeseries.diff().dropna(), fft=True, nlags=20)
        pacf_diff_trend = pacf(timeseries.diff().dropna(), nlags=20)

        def find_significant_lags(acf_values):
            conf_interval = 1.96 / len(timeseries)**0.5
            return [i for i, val in enumerate(acf_values) if abs(val) > conf_interval]

        return [find_significant_lags(acf_orig), find_significant_lags(pacf_orig),
                find_significant_lags(acf_diff_trend), find_significant_lags(pacf_diff_trend)]

    def get_PDQ(self, my_list):
        break_index = next((i for i, (a, b) in enumerate(zip(my_list, my_list[1:]), start=1) if b != a + 1), None)
        if break_index is not None:
            return len(my_list[:break_index])
        else:
            return 0

    def train_arima_model(self):
        timeseries, trend_diff = self.stationarity_tests(self.dataframe.iloc[:, self.column_index])
        lags_data = self.print_acf_pacf_lags(timeseries, trend_diff)

        P = []
        Q = []

        if trend_diff != 0:
            P = lags_data[3]
            Q = lags_data[2]
        else:
            P = lags_data[1]
            Q = lags_data[0]

        if 0 in P:
            P.remove(0)

        if 0 in Q:
            Q.remove(0)

        p = self.get_PDQ(P)
        q = self.get_PDQ(Q)
        d = trend_diff

        new_df = pd.DataFrame({self.dataframe.columns[self.column_index]: self.dataframe.iloc[:, self.column_index].values},
                              index=self.dataframe.index)
        
        train_size = int(0.6 * len(new_df))
        train_df, _ = new_df[:train_size], new_df[train_size:]
        print(train_df.index[-1])

        arima_model = ARIMA(train_df[train_df.columns[0]], order=(p, d, q)).fit()

        return arima_model, train_df

    def convert_to_datetime(self, year, quarter):
        # Map quarter to the corresponding month
        year = int(year)
        quarter = int(quarter[-1])
        month = (quarter - 1) * 3 + 1
        
        # Create a datetime object for the first day of the quarter
        datetime_object = datetime(year, month, 1, 0, 0, 0)
        
        return datetime_object

    def calculate_month_difference(self, datetime1, datetime2):
        difference = relativedelta(datetime2, datetime1)
        months_difference = difference.years * 12 + difference.months
        return months_difference
    
    def forecast_data(self, year, quarter):
        datetime2 = self.convert_to_datetime(year, quarter)
        trained_model, train_df = self.train_arima_model()
        result = int(self.calculate_month_difference(train_df.index[-1], datetime2)/3)
        future_predictions = trained_model.forecast(result)
        value = future_predictions.iloc[-1]
        present_value = self.dataframe[self.dataframe.columns[self.column_index]][-1]
        perc_change = ((value - present_value) / present_value) * 100
        if perc_change > 0:
            perc_change = f"+{perc_change:.2f}%"
        else:
            perc_change = f"{perc_change:.2f}%"
        return value.round(3), perc_change
    
    def plot_data_with_prediction(self, target_index, xl, yl, year, quarter, predicted_value):
        # Use a dark theme
        plt.style.use('dark_background')

        # Plot historical data
        plt.figure(figsize=(10, 6))
        plt.plot(self.dataframe.index, self.dataframe[self.dataframe.columns[target_index]], label='Historical Data', marker='o', color='lightblue', linestyle='-')

        future_date = self.convert_to_datetime(year, quarter)

        plt.scatter(future_date, predicted_value, color='red', label='Predicted Future Value', zorder=5)

        # Annotate the predicted value on the plot
        plt.text(future_date, predicted_value, f'{predicted_value:.2f}', color='red', ha='left', va='bottom', fontsize=10, bbox=dict(facecolor='black', edgecolor='none', boxstyle='round,pad=0.3'))

        # Customize plot aesthetics
        plt.title(xl, fontsize=14, color='white')
        plt.xlabel('Year', fontsize=12, color='white')
        plt.ylabel(yl, fontsize=12, color='white')
        plt.legend(fontsize=10)
        plt.grid(True, color='gray', linestyle='--', alpha=0.5)

        # Customize tick parameters
        plt.tick_params(axis='both', which='both', colors='white')
        return plt.gcf()

    def create_and_save_plot(self, target_index, xl, yl, save_path, year, quarter, predicted_value):
        plot = self.plot_data_with_prediction(target_index, xl, yl, year=year, quarter=quarter, predicted_value=predicted_value)
        plot.savefig(save_path)
        plt.close() 


In [2]:
arima_model_instance = ARIMAModel("./Datasets/Agriculture/Agriculture Overall.csv", 1)
trained_model = arima_model_instance.train_arima_model()
forec, change= arima_model_instance.forecast_data('2015', 'Q3')
forec

2014-10-01 00:00:00


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'


2014-10-01 00:00:00




2300.889

In [3]:
file = "./Datasets/Agriculture/Agriculture Overall.csv"

In [4]:
features = pd.read_csv(file).columns

In [6]:
year = '2018'
quarter = 'Q1'

In [13]:
predictions = {f'{features[1]}': [0, 0], f'{features[2]}': [0, 0], f'{features[3]}': [0, 0], f'{features[4]}': [0, 0]}
imgs_names = ['Production (Number)','Economy (Revenues)','Employment','GDP Contribution']
for i in range(1, 5):
    arima_model_instance = ARIMAModel("./Datasets/Agriculture/Agriculture Overall.csv", i)
    predicted_value, perc = arima_model_instance.forecast_data(year, quarter)
    key = list(predictions.keys())[i-1]
    predictions[key][0] = predicted_value
    predictions[key][1] = perc
    # xl = f'Hello {key} Forecast Plot'
    # yl = f'{key}'
    # arima_model_instance.create_and_save_plot(target_index=i-1, xl=xl, yl=yl, save_path=f'EconVisor---Industry-Forecasting-Tool/Static/ReportPlots/{imgs_names[i-1]}.png', year=year, quarter=quarter, predicted_value=predicted_value)

2014-10-01 00:00:00


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'


2014-10-01 00:00:00


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'


2014-10-01 00:00:00
2014-10-01 00:00:00


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'


In [14]:
predictions

{'Total Agricultural Production (in million tonnes)': [146.674, '+25.81%'],
 'Total Agricultural Revenues (in billion INR)': [2835.536, '-62.75%'],
 'Employment (in million people)': [58.95, '-5.07%'],
 'GDP Contribution Percentage from Agriculture': [15.266, '+8.27%']}

## Using LSTM:

In [1]:
import pandas as pd

In [23]:
file = './Datasets/Agriculture/Agriculture Overall.csv'

In [19]:
df = pd.read_csv(file)
df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])
for i in range(1, 5):
    if df.iloc[:, i].dtype == 'object':
        df.iloc[:, i] = df.iloc[:, i].str.replace(',', '')
    df.iloc[:, i] = df.iloc[:, i].astype(float)
df.set_index(df.columns[0], inplace=True)

  df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])


In [20]:
df.head()


Unnamed: 0_level_0,Total Agricultural Production (in million tonnes),Total Agricultural Revenues (in billion INR),Employment (in million people),GDP Contribution Percentage from Agriculture
Quarterly Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-01-01,92.17,1610.2,54.2,18.4
2003-04-01,94.55,1646.8,54.3,18.2
2003-07-01,81.16,1485.0,54.4,17.9
2003-10-01,86.77,1561.1,54.5,18.1
2004-01-01,95.21,1697.5,54.6,18.3


In [7]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt

In [9]:
from statsmodels.tsa.stattools import adfuller, kpss

In [21]:
df = df.iloc[:, 2]

In [80]:
df = pd.read_csv(file)
df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])
for i in range(1, 5):
    if df.iloc[:, i].dtype == 'object':
        df.iloc[:, i] = df.iloc[:, i].str.replace(',', '')
    df.iloc[:, i] = df.iloc[:, i].astype(float)
df.set_index(df.columns[0], inplace=True)
# df = df.iloc[:, [1]]
df.head()

  df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])


Unnamed: 0_level_0,Total Agricultural Production (in million tonnes),Total Agricultural Revenues (in billion INR),Employment (in million people),GDP Contribution Percentage from Agriculture
Quarterly Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-01-01,92.17,1610.2,54.2,18.4
2003-04-01,94.55,1646.8,54.3,18.2
2003-07-01,81.16,1485.0,54.4,17.9
2003-10-01,86.77,1561.1,54.5,18.1
2004-01-01,95.21,1697.5,54.6,18.3


In [38]:
df.set_index(df.columns[0], inplace=True)

In [44]:
df.head()

Unnamed: 0_level_0,Total Agricultural Revenues (in billion INR)
Quarterly Year,Unnamed: 1_level_1
2003-01-01,1610.2
2003-04-01,1646.8
2003-07-01,1485.0
2003-10-01,1561.1
2004-01-01,1697.5


In [73]:


# Extract the 'Production' column as a 1D array
production_values = df[df.columns[1]].values.reshape(-1, 1)

# Normalize the data
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(production_values)

# Create sequences and labels
def create_sequences(data, seq_length):
    sequences = []
    labels = []
    for i in range(len(data) - seq_length):
        seq = data[i:i+seq_length]
        label = data[i+seq_length:i+seq_length+1]
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)

seq_length = 3  # You can adjust this based on your needs
X, y = create_sequences(df_scaled, seq_length)

# Reshape for LSTM input (samples, time steps, features)
X = X.reshape((X.shape[0], X.shape[1], 1))


In [None]:
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(seq_length, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

model.fit(X, y, epochs=100, batch_size=1, verbose=2)


Epoch 1/100
78/78 - 1s - loss: 0.1786 - 1s/epoch - 15ms/step
Epoch 2/100
78/78 - 0s - loss: 0.1002 - 133ms/epoch - 2ms/step
Epoch 3/100
78/78 - 0s - loss: 0.0483 - 134ms/epoch - 2ms/step
Epoch 4/100
78/78 - 0s - loss: 0.0219 - 135ms/epoch - 2ms/step
Epoch 5/100
78/78 - 0s - loss: 0.0169 - 136ms/epoch - 2ms/step
Epoch 6/100
78/78 - 0s - loss: 0.0160 - 140ms/epoch - 2ms/step
Epoch 7/100
78/78 - 0s - loss: 0.0163 - 158ms/epoch - 2ms/step
Epoch 8/100
78/78 - 0s - loss: 0.0159 - 141ms/epoch - 2ms/step
Epoch 9/100
78/78 - 0s - loss: 0.0166 - 135ms/epoch - 2ms/step
Epoch 10/100
78/78 - 0s - loss: 0.0168 - 153ms/epoch - 2ms/step
Epoch 11/100
78/78 - 0s - loss: 0.0159 - 137ms/epoch - 2ms/step
Epoch 12/100
78/78 - 0s - loss: 0.0158 - 139ms/epoch - 2ms/step
Epoch 13/100
78/78 - 0s - loss: 0.0162 - 143ms/epoch - 2ms/step
Epoch 14/100
78/78 - 0s - loss: 0.0161 - 141ms/epoch - 2ms/step
Epoch 15/100
78/78 - 0s - loss: 0.0153 - 146ms/epoch - 2ms/step
Epoch 16/100
78/78 - 0s - loss: 0.0153 - 152ms/epoc

<keras.src.callbacks.History at 0x29bbc6565e0>

In [None]:
# Predict for the next quarter
future_quarter = 1
future_sequence = df_scaled[-seq_length:]
future_sequence = future_sequence.reshape((1, seq_length, 1))

future_prediction = model.predict(future_sequence)

# Inverse transform the prediction
future_prediction = scaler.inverse_transform(future_prediction)
print("Predicted Production for the next quarter:", future_prediction[0, 0])


Predicted Production for the next quarter: 8289.066


In [None]:
class ARIMAModel:
    def __init__(self, file, column_index):
        self.dataframe = self.process_dataset(file)
        self.column_index = column_index-1

    def process_dataset(self, file):
        df = pd.read_csv(file)
        df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])
        for i in range(1, 5):
            if df.iloc[:, i].dtype == 'object':
                df.iloc[:, i] = df.iloc[:, i].str.replace(',', '')
            df.iloc[:, i] = df.iloc[:, i].astype(float)
        df.set_index(df.columns[0], inplace=True)
        return df


    def convert_to_datetime(self, year, quarter):
            # Map quarter to the corresponding month
            year = int(year)
            quarter = int(quarter[-1])
            month = (quarter - 1) * 3 + 1
            
            # Create a datetime object for the first day of the quarter
            datetime_object = datetime(year, month, 1, 0, 0, 0)
            
            return datetime_object

    def calculate_month_difference(self, datetime1, datetime2):
        difference = relativedelta(datetime2, datetime1)
        months_difference = difference.years * 12 + difference.months
        return months_difference
    
    def train_lstm(self):
        
    
    def forecast_data(self, year, quarter):
        datetime2 = self.convert_to_datetime(year, quarter)
        trained_model, train_df = self.train_arima_model()
        result = int(self.calculate_month_difference(train_df.index[-1], datetime2)/3)
        future_predictions = trained_model.forecast(result)
        value = future_predictions.iloc[-1]
        present_value = self.dataframe[self.dataframe.columns[self.column_index]][-1]
        perc_change = ((value - present_value) / present_value) * 100
        if perc_change > 0:
            perc_change = f"+{perc_change:.2f}%"
        else:
            perc_change = f"{perc_change:.2f}%"
        return value.round(3), perc_change
        

In [85]:
df = pd.read_csv(file)
df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])
for i in range(1, 5):
    if df.iloc[:, i].dtype == 'object':
        df.iloc[:, i] = df.iloc[:, i].str.replace(',', '')
    df.iloc[:, i] = df.iloc[:, i].astype(float)
df.set_index(df.columns[0], inplace=True)

df = df[df.columns[selectedcolumn-1]].values.reshape(-1, 1)
# Normalize the data
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df)

# Create sequences and labels for predicting 5 quarters later
def create_sequences(data, seq_length, future_steps):
    sequences = []
    labels = []
    for i in range(len(data) - seq_length - future_steps + 1):
        seq = data[i:i+seq_length]
        label = data[i+seq_length:i+seq_length+future_steps]
        sequences.append(seq)
        labels.append(label)
    return np.array(sequences), np.array(labels)

seq_length = 3  # You can adjust this based on your needs
future_steps = 5  # Number of quarters to predict into the future

X, y = create_sequences(df_scaled, seq_length, future_steps)

# Reshape for LSTM input (samples, time steps, features)
X = X.reshape((X.shape[0], X.shape[1], 1))

# Define and train the LSTM model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(seq_length, 1)))
model.add(Dense(future_steps))
model.compile(optimizer='adam', loss='mse')

model.fit(X, y, epochs=100, batch_size=1, verbose=2)

# Predict for the next 5 quarters
future_quarters = 5
future_sequence = df_scaled[-seq_length:]
future_sequence = future_sequence.reshape((1, seq_length, 1))

future_prediction = model.predict(future_sequence)

# Inverse transform the prediction
future_prediction = scaler.inverse_transform(future_prediction)
print("Predicted Production for the next 5 quarters:", future_prediction[0])


  df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])


Epoch 1/100
73/73 - 2s - loss: 0.2280 - 2s/epoch - 21ms/step
Epoch 2/100
73/73 - 0s - loss: 0.0320 - 185ms/epoch - 3ms/step
Epoch 3/100
73/73 - 0s - loss: 0.0228 - 176ms/epoch - 2ms/step
Epoch 4/100
73/73 - 0s - loss: 0.0215 - 183ms/epoch - 3ms/step
Epoch 5/100
73/73 - 0s - loss: 0.0211 - 178ms/epoch - 2ms/step
Epoch 6/100
73/73 - 0s - loss: 0.0209 - 165ms/epoch - 2ms/step
Epoch 7/100
73/73 - 0s - loss: 0.0206 - 161ms/epoch - 2ms/step
Epoch 8/100
73/73 - 0s - loss: 0.0203 - 165ms/epoch - 2ms/step
Epoch 9/100
73/73 - 0s - loss: 0.0197 - 160ms/epoch - 2ms/step
Epoch 10/100
73/73 - 0s - loss: 0.0203 - 163ms/epoch - 2ms/step
Epoch 11/100
73/73 - 0s - loss: 0.0192 - 163ms/epoch - 2ms/step
Epoch 12/100
73/73 - 0s - loss: 0.0190 - 163ms/epoch - 2ms/step
Epoch 13/100
73/73 - 0s - loss: 0.0193 - 165ms/epoch - 2ms/step
Epoch 14/100
73/73 - 0s - loss: 0.0180 - 164ms/epoch - 2ms/step
Epoch 15/100
73/73 - 0s - loss: 0.0175 - 164ms/epoch - 2ms/step
Epoch 16/100
73/73 - 0s - loss: 0.0176 - 168ms/epoc

In [91]:
from datetime import datetime
from datetime import datetime
from dateutil.relativedelta import relativedelta

In [102]:
class TimeSeriesPredictor:
    def __init__(self, file, selected_column, seq_length=3,  epochs=100, batch_size=1):
        self.file = file
        self.selected_column = selected_column-1
        self.seq_length = seq_length
        self.epochs = epochs
        self.batch_size = batch_size
        self.df = None
        self.scaler = MinMaxScaler()
        self.model = None

    def preprocess_data(self):
        # Read CSV file and preprocess data
        self.df = pd.read_csv(self.file)
        self.df.iloc[:, 0] = pd.to_datetime(self.df.iloc[:, 0])

        for i in range(1, 5):
            if self.df.iloc[:, i].dtype == 'object':
                self.df.iloc[:, i] = self.df.iloc[:, i].str.replace(',', '')
            self.df.iloc[:, i] = self.df.iloc[:, i].astype(float)

        self.df.set_index(self.df.columns[0], inplace=True)

        selected_data = self.df[self.df.columns[self.selected_column]].values.reshape(-1, 1)
        self.df_scaled = self.scaler.fit_transform(selected_data)

    def create_sequences(self, future_steps):
        # Create sequences and labels for predicting future steps
        sequences = []
        labels = []

        for i in range(len(self.df_scaled) - self.seq_length - future_steps + 1):
            seq = self.df_scaled[i:i + self.seq_length]
            label = self.df_scaled[i + self.seq_length:i + self.seq_length + future_steps]
            sequences.append(seq)
            labels.append(label)

        self.X = np.array(sequences)
        self.y = np.array(labels)

        # Reshape for LSTM input (samples, time steps, features)
        self.X = self.X.reshape((self.X.shape[0], self.X.shape[1], 1))

    def build_model(self, future_steps):
        # Define and train the LSTM model
        self.model = Sequential()
        self.model.add(LSTM(50, activation='relu', input_shape=(self.seq_length, 1)))
        self.model.add(Dense(future_steps))
        self.model.compile(optimizer='adam', loss='mse')

        self.model.fit(self.X, self.y, epochs=self.epochs, batch_size=self.batch_size, verbose=2)

    def predict_future(self):
        # Predict for future steps
        future_sequence = self.df_scaled[-self.seq_length:]
        future_sequence = future_sequence.reshape((1, self.seq_length, 1))

        future_prediction = self.model.predict(future_sequence)

        # Inverse transform the prediction
        future_prediction = self.scaler.inverse_transform(future_prediction)
        return future_prediction[0][-1]
    
    def convert_to_datetime(self, year, quarter):
        # Map quarter to the corresponding month
        year = int(year)
        quarter = int(quarter[-1])
        month = (quarter - 1) * 3 + 1
        # Create a datetime object for the first day of the quarter
        datetime_object = datetime(year, month, 1, 0, 0, 0)
        
        return datetime_object
    
    def calculate_month_difference(self, datetime1, datetime2):
        difference = relativedelta(datetime2, datetime1)
        months_difference = difference.years * 12 + difference.months
        return months_difference
    
    def forecast_data(self, year, quarter):
        datetime2 = self.convert_to_datetime(year, quarter)
        result = int(self.calculate_month_difference(self.df.index[-1], datetime2)/3)
        self.create_sequences(result)
        self.build_model(result)
        value = self.predict_future()

        present_value = self.df[self.df.columns[self.selected_column]][-1]
        perc_change = ((value - present_value) / present_value) * 100
        if perc_change > 0:
            perc_change = f"+{perc_change:.2f}%"
        else:
            perc_change = f"{perc_change:.2f}%"
        return value.round(3), perc_change
    
    def process_dataset(self, file):
        df = pd.read_csv(file)
        df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])
        for i in range(1, 5):
            if df.iloc[:, i].dtype == 'object':
                df.iloc[:, i] = df.iloc[:, i].str.replace(',', '')
            df.iloc[:, i] = df.iloc[:, i].astype(float)
        df.set_index(df.columns[0], inplace=True)
        return df


# Example usage
file_path = file
selected_col = 4
predictor = TimeSeriesPredictor(file=file_path, selected_column=selected_col)
predictor.preprocess_data()
future_predictions = predictor.forecast_data('2024', 'Q1')
print("Predicted Production for the next 5 quarters:", future_predictions)


  self.df.iloc[:, 0] = pd.to_datetime(self.df.iloc[:, 0])


Epoch 1/100
73/73 - 2s - loss: 0.1937 - 2s/epoch - 29ms/step
Epoch 2/100
73/73 - 0s - loss: 0.0218 - 198ms/epoch - 3ms/step
Epoch 3/100
73/73 - 0s - loss: 0.0114 - 198ms/epoch - 3ms/step
Epoch 4/100
73/73 - 0s - loss: 0.0066 - 202ms/epoch - 3ms/step
Epoch 5/100
73/73 - 0s - loss: 0.0051 - 199ms/epoch - 3ms/step
Epoch 6/100
73/73 - 0s - loss: 0.0048 - 211ms/epoch - 3ms/step
Epoch 7/100
73/73 - 0s - loss: 0.0051 - 204ms/epoch - 3ms/step
Epoch 8/100
73/73 - 0s - loss: 0.0044 - 206ms/epoch - 3ms/step
Epoch 9/100
73/73 - 0s - loss: 0.0045 - 202ms/epoch - 3ms/step
Epoch 10/100
73/73 - 0s - loss: 0.0049 - 202ms/epoch - 3ms/step
Epoch 11/100
73/73 - 0s - loss: 0.0047 - 197ms/epoch - 3ms/step
Epoch 12/100
73/73 - 0s - loss: 0.0045 - 191ms/epoch - 3ms/step
Epoch 13/100
73/73 - 0s - loss: 0.0046 - 200ms/epoch - 3ms/step
Epoch 14/100
73/73 - 0s - loss: 0.0046 - 203ms/epoch - 3ms/step
Epoch 15/100
73/73 - 0s - loss: 0.0044 - 216ms/epoch - 3ms/step
Epoch 16/100
73/73 - 0s - loss: 0.0045 - 211ms/epoc

  present_value = self.df[self.df.columns[self.selected_column]][-1]


In [100]:
df3 = predictor.preprocess_data()

  self.df.iloc[:, 0] = pd.to_datetime(self.df.iloc[:, 0])


In [101]:
df3.head()

Unnamed: 0_level_0,Total Agricultural Production (in million tonnes),Total Agricultural Revenues (in billion INR),Employment (in million people),GDP Contribution Percentage from Agriculture
Quarterly Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-01-01,92.17,1610.2,54.2,18.4
2003-04-01,94.55,1646.8,54.3,18.2
2003-07-01,81.16,1485.0,54.4,17.9
2003-10-01,86.77,1561.1,54.5,18.1
2004-01-01,95.21,1697.5,54.6,18.3


## try rnn

In [103]:
df = pd.read_csv(file)
df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])
for i in range(1, 5):
    if df.iloc[:, i].dtype == 'object':
        df.iloc[:, i] = df.iloc[:, i].str.replace(',', '')
    df.iloc[:, i] = df.iloc[:, i].astype(float)
df.set_index(df.columns[0], inplace=True)
df.head()


  df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])


Unnamed: 0_level_0,Total Agricultural Production (in million tonnes),Total Agricultural Revenues (in billion INR),Employment (in million people),GDP Contribution Percentage from Agriculture
Quarterly Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2003-01-01,92.17,1610.2,54.2,18.4
2003-04-01,94.55,1646.8,54.3,18.2
2003-07-01,81.16,1485.0,54.4,17.9
2003-10-01,86.77,1561.1,54.5,18.1
2004-01-01,95.21,1697.5,54.6,18.3


In [104]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
import matplotlib.pyplot as plt

In [None]:



# Normalize the data
scaler = MinMaxScaler()
df['Production'] = scaler.fit_transform(df[['Production']])

# Prepare data for RNN
def create_sequences(data, seq_length):
    sequences = []
    targets = []

    for i in range(len(data) - seq_length):
        seq = data.iloc[i:i+seq_length]['Production'].values
        label = data.iloc[i+seq_length]['Production']
        sequences.append(seq)
        targets.append(label)

    return np.array(sequences), np.array(targets)

# Set sequence length
sequence_length = 2

# Create sequences
X, y = create_sequences(df, sequence_length)

# Reshape input data for RNN
X = X.reshape((X.shape[0], X.shape[1], 1))

# Build the RNN model
model = Sequential()
model.add(SimpleRNN(50, activation='relu', input_shape=(sequence_length, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model.fit(X, y, epochs=100, verbose=1)

# Make predictions
future_steps = 3  # Number of future steps to predict
test_input = df[-sequence_length:].values.reshape((1, sequence_length, 1))
predictions = []

for _ in range(future_steps):
    pred = model.predict(test_input)
    predictions.append(pred)
    test_input = np.append(test_input[:, 1:, :], [[pred]], axis=1)

# Inverse transform the predictions to the original scale
predictions = scaler.inverse_transform(predictions)

# Plot the results
plt.plot(df.index, df['Production'], label='Original Data')
plt.plot(pd.date_range(start=df.index[-1], periods=future_steps+1, freq='Q'), predictions, label='Future Predictions', linestyle='dashed')
plt.xlabel('QuarterlyYear')
plt.ylabel('Production')
plt.legend()
plt.show()


In [105]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense

class RNNPredictor:
    def __init__(self, file, selected_column, seq_length=3, epochs=100, batch_size=1):
        self.file = file
        self.selected_column = selected_column - 1
        self.seq_length = seq_length
        self.epochs = epochs
        self.batch_size = batch_size
        self.df = None
        self.scaler = MinMaxScaler()
        self.model = None

    def preprocess_data(self):
        # Read CSV file and preprocess data
        self.df = pd.read_csv(self.file)
        self.df.iloc[:, 0] = pd.to_datetime(self.df.iloc[:, 0])

        for i in range(1, 5):
            if self.df.iloc[:, i].dtype == 'object':
                self.df.iloc[:, i] = self.df.iloc[:, i].str.replace(',', '')
            self.df.iloc[:, i] = self.df.iloc[:, i].astype(float)

        self.df.set_index(self.df.columns[0], inplace=True)

        selected_data = self.df[self.df.columns[self.selected_column]].values.reshape(-1, 1)
        self.df_scaled = self.scaler.fit_transform(selected_data)

    def create_sequences(self, future_steps):
        # Create sequences and labels for predicting future steps
        sequences = []
        labels = []

        for i in range(len(self.df_scaled) - self.seq_length - future_steps + 1):
            seq = self.df_scaled[i:i + self.seq_length]
            label = self.df_scaled[i + self.seq_length:i + self.seq_length + future_steps]
            sequences.append(seq)
            labels.append(label)

        self.X = np.array(sequences)
        self.y = np.array(labels)

        # Reshape for RNN input (samples, time steps, features)
        self.X = self.X.reshape((self.X.shape[0], self.X.shape[1], 1))

    def build_model(self, future_steps):
        # Define and train the RNN model
        self.model = Sequential()
        self.model.add(SimpleRNN(50, activation='relu', input_shape=(self.seq_length, 1)))
        self.model.add(Dense(future_steps))
        self.model.compile(optimizer='adam', loss='mse')

        self.model.fit(self.X, self.y, epochs=self.epochs, batch_size=self.batch_size, verbose=2)

    def predict_future(self):
        # Predict for future steps
        future_sequence = self.df_scaled[-self.seq_length:]
        future_sequence = future_sequence.reshape((1, self.seq_length, 1))

        future_prediction = self.model.predict(future_sequence)

        # Inverse transform the prediction
        future_prediction = self.scaler.inverse_transform(future_prediction)
        return future_prediction[0][-1]

    def forecast_data(self, year, quarter):
        datetime2 = self.convert_to_datetime(year, quarter)
        result = int(self.calculate_month_difference(self.df.index[-1], datetime2) / 3)
        self.create_sequences(result)
        self.build_model(result)
        value = self.predict_future()

        present_value = self.df[self.df.columns[self.selected_column]][-1]
        perc_change = ((value - present_value) / present_value) * 100
        if perc_change > 0:
            perc_change = f"+{perc_change:.2f}%"
        else:
            perc_change = f"{perc_change:.2f}%"
        return value.round(3), perc_change

    def convert_to_datetime(self, year, quarter):
        # Map quarter to the corresponding month
        year = int(year)
        quarter = int(quarter[-1])
        month = (quarter - 1) * 3 + 1
        # Create a datetime object for the first day of the quarter
        datetime_object = datetime(year, month, 1, 0, 0, 0)

        return datetime_object

    def calculate_month_difference(self, datetime1, datetime2):
        difference = relativedelta(datetime2, datetime1)
        months_difference = difference.years * 12 + difference.months
        return months_difference

    def process_dataset(self, file):
        df = pd.read_csv(file)
        df.iloc[:, 0] = pd.to_datetime(df.iloc[:, 0])
        for i in range(1, 5):
            if df.iloc[:, i].dtype == 'object':
                df.iloc[:, i] = df.iloc[:, i].str.replace(',', '')
            df.iloc[:, i] = df.iloc[:, i].astype(float)
        df.set_index(df.columns[0], inplace=True)
        return df


In [106]:
file_path = file
selected_col = 1
predictor = RNNPredictor(file=file_path, selected_column=selected_col)
predictor.preprocess_data()
future_predictions = predictor.forecast_data('2024', 'Q1')
print("Predicted Production:", future_predictions)

  self.df.iloc[:, 0] = pd.to_datetime(self.df.iloc[:, 0])


Epoch 1/100
73/73 - 2s - loss: 0.1180 - 2s/epoch - 24ms/step
Epoch 2/100
73/73 - 0s - loss: 0.0177 - 183ms/epoch - 3ms/step
Epoch 3/100
73/73 - 0s - loss: 0.0147 - 187ms/epoch - 3ms/step
Epoch 4/100
73/73 - 0s - loss: 0.0136 - 188ms/epoch - 3ms/step
Epoch 5/100
73/73 - 0s - loss: 0.0131 - 188ms/epoch - 3ms/step
Epoch 6/100
73/73 - 0s - loss: 0.0119 - 187ms/epoch - 3ms/step
Epoch 7/100
73/73 - 0s - loss: 0.0117 - 192ms/epoch - 3ms/step
Epoch 8/100
73/73 - 0s - loss: 0.0109 - 189ms/epoch - 3ms/step
Epoch 9/100
73/73 - 0s - loss: 0.0113 - 193ms/epoch - 3ms/step
Epoch 10/100
73/73 - 0s - loss: 0.0101 - 185ms/epoch - 3ms/step
Epoch 11/100
73/73 - 0s - loss: 0.0102 - 192ms/epoch - 3ms/step
Epoch 12/100
73/73 - 0s - loss: 0.0110 - 205ms/epoch - 3ms/step
Epoch 13/100
73/73 - 0s - loss: 0.0111 - 195ms/epoch - 3ms/step
Epoch 14/100
73/73 - 0s - loss: 0.0096 - 199ms/epoch - 3ms/step
Epoch 15/100
73/73 - 0s - loss: 0.0102 - 203ms/epoch - 3ms/step
Epoch 16/100
73/73 - 0s - loss: 0.0100 - 204ms/epoc

  present_value = self.df[self.df.columns[self.selected_column]][-1]
