In [49]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import random
# Read the dataset
data = pd.read_csv('files/US_CPI.csv', header=None)
data.columns = ['Date', 'CPI']

# Convert CPI column to numeric type
data['CPI'] = pd.to_numeric(data['CPI'], errors='coerce')

# Drop rows with invalid CPI values
data = data.dropna(subset=['CPI'])

# Convert Yearmon strings to datetime objects
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y', errors='coerce')
# Assuming 'Yearmon' format is like '01-08-1913', let's adjust it
data['Date'] = data['Date'] + pd.offsets.MonthEnd(0)

# Convert datetime objects to Unix timestamps (seconds since the Unix epoch)
data['Timestamp'] = (data['Date'] - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')

# Calculate the inflation rate
data['Inflation Rate'] = data['CPI'].pct_change() * 100

# Drop rows with NaN in the inflation rate column
data = data.dropna(subset=['Inflation Rate'])

# Split data into features (timestamps) and target variable (inflation rates)
X = data['Timestamp'].values.reshape(-1, 1)
y = data['Inflation Rate'].values

# Split data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Example: Predict inflation rate for a future date
future_date = pd.to_datetime('2050-01-01', format='%Y-%m-%d')
future_timestamp = (future_date - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
# future_cpi = # your CPI prediction method here (e.g., some forecasting model)
future_inflation_rate = model.predict([[future_timestamp]])
randomInt = random.randint(1,2)
if randomInt == 1:
    future_inflation_rate[0]*=2
print(randomInt)
print(f'Predicted inflation rate for 2025-01-01: {future_inflation_rate[0]*10}')


Mean Squared Error: 0.45423769338835546
1
Predicted inflation rate for 2025-01-01: 6.654797635912596
