### Prediction Anaylsis

the folling are two models on price and sales prediction using Price_table.csv and Sales_tables.csv. The Model includes data cleansing and prediction.

### Price Prediction Model

In [None]:
import pandas as pd
import numpy as np

# Load the data
data = pd.read_csv('Price_table.csv')

# Group data by Genmodel_ID
grouped_data = data.groupby('Genmodel_ID')

# Data Cleansing
# Ensure each group has price data up to 2021, considering 4% inflation each year
for name, group in grouped_data:
    last_year = group['Year'].max()
    last_price = group.loc[group['Year'] == last_year, 'Entry_price'].values[0]
    
    for year in range(last_year + 1, 2022):
        last_price *= 1.04  # Apply 4% inflation
        new_row = {'Maker': group['Maker'].iloc[0], 'Genmodel': group['Genmodel'].iloc[0], 'Genmodel_ID': name, 'Year': year, 'Entry_price': last_price}
        data = pd.concat([data, pd.DataFrame([new_row])], ignore_index=True)

# Group data again after adding missing years
grouped_data = data.groupby('Genmodel_ID')

# Calculate EMA with alpha 0.3 and project upcoming 5 years price
alpha = 0.3

for name, group in grouped_data:
    group = group.sort_values('Year')
    group['EMA'] = group['Entry_price'].ewm(alpha=alpha, adjust=False).mean()
    
    last_year = group['Year'].max()
    last_ema = group.loc[group['Year'] == last_year, 'EMA'].values[0]
    
    for year in range(last_year + 1, last_year + 6):  # Project next 5 years
        new_price = last_ema * 1.04  # Apply 4% inflation to the last EMA value
        last_ema = alpha * new_price + (1 - alpha) * last_ema  # Update EMA with the new price
        new_row = {'Maker': group['Maker'].iloc[0], 'Genmodel': group['Genmodel'].iloc[0], 'Genmodel_ID': name, 'Year': year, 'Entry_price': np.nan, 'EMA': last_ema}
        data = pd.concat([data, pd.DataFrame([new_row])], ignore_index=True)

# Pivot the DataFrame to get each Genmodel_ID in a single row with years as columns
pivot_data = data.pivot_table(index=['Maker', 'Genmodel', 'Genmodel_ID'], columns='Year', values='EMA', aggfunc='first')

# Reset the index to turn index columns back into regular columns
pivot_data.reset_index(inplace=True)

# Save the updated data to a new CSV file
pivot_data.to_csv('Projected_Price_data.csv', index=False)

# Group data again after adding projected years
grouped_data = data.groupby('Genmodel_ID')



### Sales Prediction Model

In [None]:
import pandas as pd
import numpy as np

# Load the sales data
data = pd.read_csv('Sales_table.csv')

# Data Cleansing
# Ensure the columns are sorted correctly by year
# Assuming the years are from 2020 in column 4 (3rd index) to 2001 in the last column
year_columns = data.columns[3:]  # Extract the year columns
year_columns_sorted = sorted(year_columns, key=lambda x: int(x), reverse=True)

# Reorder the columns, keeping the first three columns intact
data = data[data.columns[:3].tolist() + year_columns_sorted]

# Create an array for EMA starting with the value in the 4th column (2020) until the last column (2001)
sales_data_arrays = data.iloc[:, 3:].values

# Reverse the sales data arrays
reversed_sales_data_arrays = [np.flip(sales_data) for sales_data in sales_data_arrays]

# Calculate EMA with alpha 0.3 and predict sales for 2021-2026
alpha = 0.3
projected_years = 6
projected_sales = []

for sales_data in reversed_sales_data_arrays:
    ema = pd.Series(sales_data).ewm(alpha=alpha, adjust=False).mean().values
    last_ema = ema[-1]  # Get the last EMA value

    # Predict sales for the next years (2021-2026)
    current_year_projection = []
    for year in range(2021, 2021 + projected_years):
        new_sales = last_ema * 1.04  # Apply 4% inflation
        last_ema = alpha * new_sales + (1 - alpha) * last_ema  # Update EMA with the new price
        current_year_projection.append(last_ema)

    projected_sales.append(current_year_projection)

# Reshape the projected sales to match the structure of the original data
projected_sales = np.array(projected_sales).reshape(-1, projected_years)

# Create a DataFrame for the projected sales
projected_years_columns = [str(year) for year in range(2021, 2021 + projected_years)]
projected_sales_df = pd.DataFrame(projected_sales, columns=projected_years_columns)

# Combine the projected sales with the original data
combined_df = pd.concat([data, projected_sales_df], axis=1)

# Save the updated data to a new CSV file
combined_df.to_csv('Projected_Sales_data.csv', index=False)

# Display the combined data
print(combined_df)


      Maker      Genmodel Genmodel_ID   2020   2019   2018   2017   2016  \
0    ABARTH    ABARTH 124         2_1      0     19     27     60      0   
1    ABARTH    ABARTH 500         2_2      0      0      1      2     66   
2    ABARTH    ABARTH 595         2_4   2144   2866   3907   3295   3132   
3    ABARTH    ABARTH 695         2_6     45     65    270    114     29   
4    ABARTH  ABARTH PUNTO         2_9      0      0      0      0      0   
..      ...           ...         ...    ...    ...    ...    ...    ...   
768   VOLVO    VOLVO XC40       96_20  24281  14894   6616      8      0   
769   VOLVO    VOLVO XC60       96_16   7694  11182  10840  14994  14808   
770   VOLVO    VOLVO XC70       96_17      0      0      0     17   1006   
771   VOLVO    VOLVO XC90       96_18   4969   7495   6475   5564   5254   
772   ZENOS     ZENOS E10        99_1      0      0      0      2     10   

      2015  2014  ...  2004  2003  2002  2001          2021          2022  \
0        0