In [1]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [91]:
visa_df = pd.read_csv("Visa_Issued(2005-2023).csv")
visa_df["Decisions"].replace(',', '', regex=True, inplace=True)
visa_df["Decisions"] = pd.to_numeric(visa_df["Decisions"])
nation_df = visa_df.groupby(["Year", "Quarter", "Nationality","Region"]).agg({"Decisions":"sum"}).reset_index()
# Pivot the DataFrame to have Nationality as rows, Quarter as columns, and Decisions as values
pivot_df = nation_df.pivot_table(index=['Nationality','Region'], columns='Quarter', values='Decisions', fill_value=0)
# Reset index to make Nationality a regular column
pivot_df = pivot_df.reset_index()
# Melt the DataFrame to convert it back to the original format
melted_df = pivot_df.melt(id_vars=['Nationality','Region'], var_name='Quarter', value_name='Decisions')
class QuarterlyDateConverter:
    def __init__(self):
        self.quarter_to_last_day = {
            'Q1': '03-31',
            'Q2': '06-30',
            'Q3': '09-30',
            'Q4': '12-31'
        }

    def convert_quarter_column(self, df, quarter_column_name, year_column_name=None):
        df_copy = df.copy()

        # Split the quarter column into "Year" and "Quarter" columns
        df_copy[['Year', 'Quarter']] = df_copy[quarter_column_name].str.split(' ', expand=True)

        # Replace the "Quarter" column with the last day of the quarter
        df_copy['Quarter'] = df_copy['Quarter'].map(self.quarter_to_last_day)
        df_copy['Date'] = df_copy['Year'] + '-' + df_copy['Quarter']
        df_copy['Date'] = pd.to_datetime(df_copy['Date'], format='%Y-%m-%d')

        # Drop the intermediate "Year" and "Quarter" columns
        if year_column_name:
            df_copy.drop([year_column_name, quarter_column_name], axis=1, inplace=True)
        else:
            df_copy.drop(['Year', quarter_column_name], axis=1, inplace=True)

        return df_copy


# Create an instance of the QuarterlyDateConverter
converter = QuarterlyDateConverter()
# Apply the conversion using the class
final_df = converter.convert_quarter_column(melted_df, 'Quarter')

# Now, result_df contains the DataFrame with the "Date" column
final_df

Unnamed: 0,Nationality,Region,Decisions,Date
0,Afghanistan,Asia Central,877,2005-03-31
1,Albania,Europe Other,2460,2005-03-31
2,Algeria,Africa North,2391,2005-03-31
3,Andorra,Europe Other,0,2005-03-31
4,Angola,Africa Sub-Saharan,787,2005-03-31
...,...,...,...,...
15544,Vietnam,Asia South East,3678,2023-03-31
15545,Virgin Islands (British),Other,0,2023-03-31
15546,Yemen,Middle East,629,2023-03-31
15547,Zambia,Africa Sub-Saharan,1066,2023-03-31


In [100]:
final_df['Nationality'].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Anguilla (British)', 'Antigua and Barbuda', 'Argentina',
       'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas, The',
       'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium',
       'Belize', 'Benin', 'Bermuda (British)', 'Bhutan', 'Bolivia',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'British overseas citizens', 'Brunei', 'Bulgaria', 'Burkina',
       'Burma', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde',
       'Cayman Islands (British)', 'Central African Republic', 'Chad',
       'Chile', 'China', 'Colombia', 'Comoros', 'Congo',
       'Congo (Democratic Republic)', 'Costa Rica', 'Croatia', 'Cuba',
       'Cyprus', 'Cyprus (Northern part of)', 'Czech Republic', 'Denmark',
       'Djibouti', 'Dominica', 'Dominican Republic', 'East Timor',
       'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea',
       'Estonia', 'Ethiopia', 'Fiji', 'Finland', '

In [92]:
# Create a dictionary to store DataFrames and time series data for each nationality
nationality_dfs = {}
nationality_time_series = {}

# Loop through unique nationalities and create DataFrames and time series data
for country, group in final_df.groupby('Nationality'):
    nationality_dfs[country] = group

    # Convert the DataFrame to a time series format
    time_series_data = group.set_index('Date')['Decisions']
    nationality_time_series[country] = time_series_data

# Now we have two dictionaries:
# 1. nationality_dfs: Keys are country names, and values are corresponding DataFrames
# 2. nationality_time_series: Keys are country names, and values are time series data

# Loop through all nationalities and create DataFrame variables dynamically
for country, time_series_data in nationality_time_series.items():
    # Create valid variable names by replacing special characters and spaces
    variable_name = country.lower().replace(' ', '_').replace('(', '').replace(')', '')

    # Assign the DataFrames and time series data to dynamically created variables
    globals()[f"{variable_name}_df"] = nationality_dfs[country]
    globals()[f"{variable_name}_time_series_data"] = pd.Series(time_series_data)

# Now, you can access all the DataFrames and time series data using the dynamically created variables
print(afghanistan_df)
print(albania_df)
print(afghanistan_time_series_data)
print(albania_time_series_data)

       Nationality        Region  Decisions       Date
0      Afghanistan  Asia Central        877 2005-03-31
213    Afghanistan  Asia Central        936 2005-06-30
426    Afghanistan  Asia Central       1069 2005-09-30
639    Afghanistan  Asia Central        817 2005-12-31
852    Afghanistan  Asia Central        948 2006-03-31
...            ...           ...        ...        ...
14484  Afghanistan  Asia Central       2027 2022-03-31
14697  Afghanistan  Asia Central       2040 2022-06-30
14910  Afghanistan  Asia Central       2653 2022-09-30
15123  Afghanistan  Asia Central       1964 2022-12-31
15336  Afghanistan  Asia Central       1526 2023-03-31

[73 rows x 4 columns]
      Nationality        Region  Decisions       Date
1         Albania  Europe Other       2460 2005-03-31
214       Albania  Europe Other       2975 2005-06-30
427       Albania  Europe Other       2861 2005-09-30
640       Albania  Europe Other       3266 2005-12-31
853       Albania  Europe Other       1837 2006

In [71]:
import pandas as pd
import statsmodels.api as sm


def sarimax_forecast(time_series_data, order, seasonal_order, nationality, forecast_steps=40):
    """
    Forecast future values using SARIMAX model.

    Parameters:
    - time_series_data: pandas Series, the input time series data
    - order: tuple, order of the non-seasonal part of the ARIMA model
    - seasonal_order: tuple, order of the seasonal part of the ARIMA model
    - nationality: str, the name of the nationality
    - forecast_steps: int, number of steps to forecast into the future

    Returns:
    - forecast_df: pandas DataFrame, containing forecasted values, lower and upper confidence intervals, dates, and nationality
    """
    # Fit SARIMAX model
    model = sm.tsa.statespace.SARIMAX(time_series_data, order=order, seasonal_order=seasonal_order, tolerance=1e-6)
    result = model.fit(disp=0)

    # Forecast the next 'forecast_steps' quarters
    forecast_result = result.get_forecast(steps=forecast_steps)

    # Extract forecasted values and confidence intervals
    forecast_values = forecast_result.predicted_mean

    # Create a dataframe to store the forecast results
    forecast_df = pd.DataFrame({
        'Nationality': [nationality] * forecast_steps,
        'Decisions': forecast_values,
    })

    # Add a column for the quarter dates
    forecast_df.reset_index(inplace=True)
    forecast_df.rename(columns={"index":"Date"}, inplace=True)
    return forecast_df


# Example usage:
# Replace 'afghanistan_time_series_data', 'order_afghanistan', 'seasonal_order_afghanistan', 'Afghanistan' with your actual data
afghanistan_forecast = sarimax_forecast(afghanistan_time_series_data, (0, 1, 3), (0, 1, 3, 4), 'Afghanistan')
#afghanistan_forecast =  pd.DataFrame(afghanistan_forecast.reset_index(inplace=True))
print(afghanistan_forecast)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


         Date  Nationality    Decisions
0  2023-06-30  Afghanistan  1726.319795
1  2023-09-30  Afghanistan  2597.255725
2  2023-12-31  Afghanistan  2134.376664
3  2024-03-31  Afghanistan  1997.681745
4  2024-06-30  Afghanistan  1984.744126
5  2024-09-30  Afghanistan  2843.996708
6  2024-12-31  Afghanistan  2327.691103
7  2025-03-31  Afghanistan  2188.037012
8  2025-06-30  Afghanistan  2177.288587
9  2025-09-30  Afghanistan  3034.722866
10 2025-12-31  Afghanistan  2516.433918
11 2026-03-31  Afghanistan  2373.436954
12 2026-06-30  Afghanistan  2365.097254
13 2026-09-30  Afghanistan  3222.994490
14 2026-12-31  Afghanistan  2705.062902
15 2027-03-31  Afghanistan  2562.065939
16 2027-06-30  Afghanistan  2553.726238
17 2027-09-30  Afghanistan  3411.623474
18 2027-12-31  Afghanistan  2893.691887
19 2028-03-31  Afghanistan  2750.694923
20 2028-06-30  Afghanistan  2742.355223
21 2028-09-30  Afghanistan  3600.252459
22 2028-12-31  Afghanistan  3082.320871
23 2029-03-31  Afghanistan  2939.323908


In [21]:
frames = [final_df, afghanistan_forecast]
result = pd.concat(frames)
result

Unnamed: 0,Nationality,Decisions,Date
0,Afghanistan,877.000000,2005-03-31
1,Albania,2460.000000,2005-03-31
2,Algeria,2391.000000,2005-03-31
3,Andorra,0.000000,2005-03-31
4,Angola,787.000000,2005-03-31
...,...,...,...
35,Afghanistan,3505.344341,2032-03-31
36,Afghanistan,3497.007187,2032-06-30
37,Afghanistan,4354.955478,2032-09-30
38,Afghanistan,3836.991020,2032-12-31


In [61]:
bestmodel_df = pd.read_csv("Parameters.csv")

In [62]:

# Remove parentheses from strings in a specific column
bestmodel_df['order'] = bestmodel_df['order'].str.replace('(', '')
bestmodel_df['order'] = bestmodel_df['order'].str.replace(')', '')
bestmodel_df['order'] = bestmodel_df['order'].apply(lambda x: tuple(map(int, x.split(','))))
bestmodel_df['seasonal_order'] = bestmodel_df['seasonal_order'].str.replace('(', '')
bestmodel_df['seasonal_order'] = bestmodel_df['seasonal_order'].str.replace(')', '')
bestmodel_df['seasonal_order'] = bestmodel_df['seasonal_order'].apply(lambda x: tuple(map(int, x.split(','))))
# Display the modified DataFrame
bestmodel_df

Unnamed: 0.1,Unnamed: 0,nationality,order,seasonal_order
0,0,Afghanistan,"(0, 1, 3)","(0, 1, 3, 4)"
1,1,Albania,"(0, 1, 3)","(2, 2, 3, 4)"
2,2,Algeria,"(0, 2, 3)","(0, 2, 3, 4)"
3,3,Andorra,"(0, 1, 3)","(0, 1, 3, 4)"
4,4,Angola,"(1, 2, 3)","(1, 2, 3, 4)"
...,...,...,...,...
208,208,Vietnam,"(0, 2, 3)","(1, 2, 3, 4)"
209,209,Virgin Islands (British),"(0, 0, 0)","(0, 0, 0, 4)"
210,210,Yemen,"(0, 1, 3)","(1, 1, 3, 4)"
211,211,Zambia,"(0, 2, 3)","(1, 2, 3, 4)"


In [72]:
import ast
import pandas as pd
import statsmodels.api as sm

# Assuming you have a DataFrame named 'best_model_df' with the structure you provided
# Replace 'best_model_df' with your actual DataFrame name

# Create a dictionary to store forecast DataFrames for each nationality
forecast_dfs = {}

# Loop through each row in the best model DataFrame
for index, row in bestmodel_df.iterrows():
    nationality = row['nationality']
    order = row['order']
    seasonal_order = row['seasonal_order']

    # Get the corresponding time series data
    time_series_data_variable_name = f"{nationality.lower().replace(' ', '_').replace('(', '').replace(')', '')}_time_series_data"

    try:
        # Try to fetch the time series data variable
        time_series_data = globals()[time_series_data_variable_name]
    except KeyError:
        # Handle the case when the variable does not exist
        print(f"Variable {time_series_data_variable_name} not found.")
        continue

    # Forecast for the current nationality
    forecast_df = sarimax_forecast(time_series_data, order, seasonal_order, nationality)

    # Store the forecast DataFrame in the dictionary
    forecast_dfs[nationality] = forecast_df

# Now, 'forecast_dfs' contains forecast DataFrames for each nationality
# Access the forecasts using 'forecast_dfs[nationality]'
# For example, to access the forecast for Algeria:
print(forecast_dfs['Algeria'])


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._

         Date Nationality      Decisions
0  2023-06-30     Algeria    6526.679753
1  2023-09-30     Algeria    7778.523360
2  2023-12-31     Algeria    9585.383010
3  2024-03-31     Algeria    9779.805108
4  2024-06-30     Algeria    9681.286045
5  2024-09-30     Algeria   11148.372031
6  2024-12-31     Algeria   13421.776187
7  2025-03-31     Algeria   13817.615667
8  2025-06-30     Algeria   13796.604762
9  2025-09-30     Algeria   15495.029420
10 2025-12-31     Algeria   18379.784733
11 2026-03-31     Algeria   18931.518099
12 2026-06-30     Algeria   19060.382673
13 2026-09-30     Algeria   21046.605320
14 2026-12-31     Algeria   24599.648888
15 2027-03-31     Algeria   25364.213238
16 2027-06-30     Algeria   25699.890391
17 2027-09-30     Algeria   28030.848125
18 2027-12-31     Algeria   32309.117046
19 2028-03-31     Algeria   33343.449477
20 2028-06-30     Algeria   33942.876307
21 2028-09-30     Algeria   36675.506226
22 2028-12-31     Algeria   41735.937598
23 2029-03-31   



In [73]:
# Loop through all nationalities and create DataFrame variables dynamically
for country, time_series_data in nationality_time_series.items():
    # Create valid variable names by replacing special characters and spaces
    variable_name = country.lower().replace(' ', '_').replace('(', '').replace(')', '')

    # Assign the DataFrames and time series data to dynamically created variables
    globals()[f"{variable_name}_df"] = nationality_dfs[country]
    globals()[f"{variable_name}_time_series_data"] = pd.Series(time_series_data)

213

In [95]:
import pandas as pd

# Assuming forecast_dfs is your dictionary of DataFrames
# Combine all DataFrames into one
combined_forecast_df = pd.concat(forecast_dfs.values(), axis=0, ignore_index=True)
combined_forecast_df

Unnamed: 0,Date,Nationality,Decisions
0,2023-06-30,Afghanistan,1726.319795
1,2023-09-30,Afghanistan,2597.255725
2,2023-12-31,Afghanistan,2134.376664
3,2024-03-31,Afghanistan,1997.681745
4,2024-06-30,Afghanistan,1984.744126
...,...,...,...
8515,2032-03-31,Zimbabwe,456198.657982
8516,2032-06-30,Zimbabwe,478841.525548
8517,2032-09-30,Zimbabwe,505381.141744
8518,2032-12-31,Zimbabwe,529230.634179


In [96]:
regions_df = final_df.drop(columns=["Date","Decisions"])
regions_df.drop_duplicates(subset='Nationality', keep='first', inplace=True)
combined_forecast_df =  pd.merge(combined_forecast_df, regions_df,on="Nationality", how="left")
combined_forecast_df

Unnamed: 0,Date,Nationality,Decisions,Region
0,2023-06-30,Afghanistan,1726.319795,Asia Central
1,2023-09-30,Afghanistan,2597.255725,Asia Central
2,2023-12-31,Afghanistan,2134.376664,Asia Central
3,2024-03-31,Afghanistan,1997.681745,Asia Central
4,2024-06-30,Afghanistan,1984.744126,Asia Central
...,...,...,...,...
8515,2032-03-31,Zimbabwe,456198.657982,Africa Sub-Saharan
8516,2032-06-30,Zimbabwe,478841.525548,Africa Sub-Saharan
8517,2032-09-30,Zimbabwe,505381.141744,Africa Sub-Saharan
8518,2032-12-31,Zimbabwe,529230.634179,Africa Sub-Saharan


In [97]:
frames = [final_df, combined_forecast_df]
final_result = pd.concat(frames)
final_result.sort_values(by=['Date'])
final_result

Unnamed: 0,Nationality,Region,Decisions,Date
0,Afghanistan,Asia Central,877.000000,2005-03-31
1,Albania,Europe Other,2460.000000,2005-03-31
2,Algeria,Africa North,2391.000000,2005-03-31
3,Andorra,Europe Other,0.000000,2005-03-31
4,Angola,Africa Sub-Saharan,787.000000,2005-03-31
...,...,...,...,...
8515,Zimbabwe,Africa Sub-Saharan,456198.657982,2032-03-31
8516,Zimbabwe,Africa Sub-Saharan,478841.525548,2032-06-30
8517,Zimbabwe,Africa Sub-Saharan,505381.141744,2032-09-30
8518,Zimbabwe,Africa Sub-Saharan,529230.634179,2032-12-31


In [99]:
import pandas as pd
from dash import Dash, dcc, html
from dash.dependencies import Input, Output
import plotly.express as px

df = final_result.copy()
df['Date'] = pd.to_datetime(df['Date'])

# Find the unique years in the dataset
years = sorted(df['Date'].dt.year.unique())

# Create a Dash web application
app = Dash(__name__)

# Define the layout of the app
app.layout = html.Div([
    dcc.Graph(id='choropleth-map'),
    dcc.Slider(
        id='year-slider',
        min=min(years),
        max=max(years),
        value=min(years),
        marks={str(year): str(year) for year in years},
        step=None
    ),
])

# Define callback to update choropleth map based on the slider value
@app.callback(
    Output('choropleth-map', 'figure'),
    [Input('year-slider', 'value')]
)
def update_choropleth(selected_year):
    filtered_data = df[df['Date'].dt.year == selected_year]

    fig = px.choropleth(
        filtered_data,
        locations='Nationality',
        locationmode='country names',
        color='Decisions',
        hover_name='Nationality',
        title=f'Decisions by Nationality ({selected_year})'
    )

    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True, port=8051)


In [98]:
import pandas as pd
import plotly.express as px

# Assuming you have a DataFrame named 'final_df'
# If not, replace 'final_df' with the actual name of your dataframe

# Function to plot the data based on the selected category (Nationality or Region)
def plot_data(dataframe, category):
    fig = px.line(dataframe, x='Date', y='Decisions', color=category, labels={'Decisions': 'Decisions'},
                  title=f'Decisions over Time for Different {category.capitalize()}')

    # Add interactive labels
    fig.update_traces(hovertemplate='<b></b><br>Date: %{x}<br>Decisions: %{y}')

    # Show the plot
    fig.show()

# Plot initial data using 'Nationality'
plot_data(final_result, 'Nationality')
plot_data(final_result, 'Region')


In [90]:
print(final_result)

      Nationality      Decisions       Date
0     Afghanistan     877.000000 2005-03-31
1         Albania    2460.000000 2005-03-31
2         Algeria    2391.000000 2005-03-31
3         Andorra       0.000000 2005-03-31
4          Angola     787.000000 2005-03-31
...           ...            ...        ...
8515     Zimbabwe  456198.657982 2032-03-31
8516     Zimbabwe  478841.525548 2032-06-30
8517     Zimbabwe  505381.141744 2032-09-30
8518     Zimbabwe  529230.634179 2032-12-31
8519     Zimbabwe  554051.628917 2033-03-31

[24069 rows x 3 columns]
