In [6]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
import matplotlib.pyplot as plt
import pickle

# Load the dataset
df = pd.read_csv('Human trafficking 15-20.csv')

# Get the list of states from the first column
states = df.iloc[:, 0].unique()

# Create a dictionary to store the models and predictions for each state
models = {}
predictions = {}
accuracy ={}
# Loop over each state and train a separate SARIMA model
for state in states:
    # Get the data for the current state
    data = df[df.iloc[:, 0] == state].iloc[:, 1:].squeeze()

    # Split the data into training and testing sets
    train_data = data[:-2]
    test_data = data[-2:]

    # Train the SARIMA model
    model = SARIMAX(train_data, order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
    model_fit = model.fit()

    # Make predictions for the next 5 years
    predictions[state] = model_fit.forecast(steps=5)

    # Store the trained model
    models[state] = model_fit
    accuracy[state] = 1 - (np.sum(np.abs(actual_values - predicted_values[:6])) / np.sum(actual_values))
    state_rmse[state] = accuracy[state]
    
    # Print the RMSE for this state
    print(f"{state}: {state_rmse[state]}")
    
    # Save the model for the current state
    model_filename = f'{state}_model.pkl'
    with open(model_filename, 'wb') as file:
        pickle.dump(model_fit, file)

# Plot the predicted values for each state
for state in states:
    # Get the actual and predicted values for the current state
    actual_values = df[df.iloc[:, 0] == state].iloc[:, 1:].squeeze().values
    predicted_values = np.concatenate([actual_values[:-2], predictions[state], np.repeat(predictions[state][-1], 2)])

    # Create a plot for the current state
    plt.figure(figsize=(8, 4))
    plt.plot(actual_values, label='Actual Values')
    plt.plot(predicted_values, label='Predicted Values')
    plt.xticks(np.arange(len(actual_values)), np.arange(2015, 2023), rotation=45)
    plt.xlabel('Year')
    plt.ylabel('Number of Cases')
    plt.title(state)
    plt.legend()
    plt.show()


ModuleNotFoundError: No module named 'statsmodels'

In [None]:
# Create a dictionary to store the accuracy for each state
accuracy = {}

# Loop over each state
for state in states:
    # Get the actual and predicted values for the current state
    actual_values = df[df.iloc[:, 0] == state].iloc[:, 1:].squeeze().values
    predicted_values = np.concatenate([actual_values[:-2], predictions[state], np.repeat(predictions[state][-1], 2)])

    # Calculate the mean absolute percentage error (MAPE) for the current state
    mape = np.mean(np.abs((actual_values - predicted_values[:6]) / actual_values)) * 100

    # Store the accuracy for the current state
    accuracy[state] = 100 - mape

# Print the accuracy for each state
for state, acc in accuracy.items():
    print(state, ':', acc, '%')

In [None]:
# Create a dictionary to store the MAE for each state
mae_dict = {}

# Loop over each state
for state in states:
    # Get the actual and predicted values for the current state
    actual_values = df[df.iloc[:, 0] == state].iloc[:, 1:].squeeze().values
    predicted_values = np.concatenate([actual_values[:-2], predictions[state], np.repeat(predictions[state][-1], 2)])

    # Calculate the MAE for the current state
    mae = np.mean(np.abs(predicted_values[:6] - actual_values))
    mae_dict[state] = mae

# Print the MAE for each state
for state, mae in mae_dict.items():
    print(f"MAE for {state}: {mae}")
