In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# load dataset
dataset = pd.read_csv('../../data/processed/food_prices_encoded.csv')

In [3]:
dataset.head()

Unnamed: 0,year,month,price(MZN),province_Cabo_Delgado,province_Gaza,province_Inhambane,province_Manica,province_Maputo,province_Maputo City,province_Nampula,...,commodity_Rice (local)_KG,commodity_Rice_25 KG,commodity_Rice_KG,commodity_Salt (iodised)_KG,"commodity_Sugar (brown, imported)_KG","commodity_Sugar (brown, local)_KG",commodity_Sugar_KG,commodity_Sweet potatoes_KG,commodity_Tomatoes_KG,commodity_Wheat flour (local)_KG
0,2013,7,10.0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2013,8,10.0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2013,9,10.0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2013,12,16.67,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2000,2,1.14,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
# split the data into training and testing sets
X = dataset.drop(['price(MZN)'], axis=1)
y = dataset['price(MZN)']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# create and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [6]:
# make predictions with the trained model
y_pred = model.predict(X_test)

In [7]:
# evaluate model performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse}")
print(f"Determination coefficient (R²): {r2}")

Mean Squared Error (MSE): 635.5838272134666
Determination coefficient (R²): 0.9761011736558604


In [8]:
commodity_data = {} # dictionary to store entered data

# function to predict the price based on entered data
def predict_food_price(model, feature_columns):
    
    # prompt the user to enter values for each feature column
    for column in feature_columns:
        value = int(input(f"Enter the value for '{column}': "))
        commodity_data[column] = [value]

    # create a DataFrame with the commodity's data
    commodity_df = pd.DataFrame.from_dict(commodity_data)

    # make the prediction based on the commodity's data
    predicted_price = model.predict(commodity_df)

    return predicted_price[0]

# list of feature columns used in the model
feature_columns = list(dataset.drop(['price(MZN)'], axis=1).columns)

# call the function to predict the price
predicted_price = predict_food_price(model, feature_columns)

print(f"Predicted Price: {predicted_price}")


Enter the value for 'year':  2023
Enter the value for 'month':  1
Enter the value for 'province_Cabo_Delgado':  1
Enter the value for 'province_Gaza':  0
Enter the value for 'province_Inhambane':  0
Enter the value for 'province_Manica':  0
Enter the value for 'province_Maputo':  0
Enter the value for 'province_Maputo City':  0
Enter the value for 'province_Nampula':  0
Enter the value for 'province_Niassa':  0
Enter the value for 'province_Sofala':  0
Enter the value for 'province_Tete':  0
Enter the value for 'province_Zambezia':  0
Enter the value for 'category_cereals and tubers':  0
Enter the value for 'category_meat, fish and eggs':  0
Enter the value for 'category_miscellaneous food':  1
Enter the value for 'category_oil and fats':  0
Enter the value for 'category_pulses and nuts':  0
Enter the value for 'category_vegetables and fruits':  0
Enter the value for 'commodity_Beans (butter)_KG':  0
Enter the value for 'commodity_Beans (catarino)_KG':  0
Enter the value for 'commodity

Predicted Price: 72.099609375


In [9]:
# show the entered data
print(commodity_data)

{'year': [2023], 'month': [1], 'province_Cabo_Delgado': [1], 'province_Gaza': [0], 'province_Inhambane': [0], 'province_Manica': [0], 'province_Maputo': [0], 'province_Maputo City': [0], 'province_Nampula': [0], 'province_Niassa': [0], 'province_Sofala': [0], 'province_Tete': [0], 'province_Zambezia': [0], 'category_cereals and tubers': [0], 'category_meat, fish and eggs': [0], 'category_miscellaneous food': [1], 'category_oil and fats': [0], 'category_pulses and nuts': [0], 'category_vegetables and fruits': [0], 'commodity_Beans (butter)_KG': [0], 'commodity_Beans (catarino)_KG': [0], 'commodity_Beans (dry)_KG': [0], 'commodity_Beans (magnum)_KG': [0], 'commodity_Cabbage_KG': [0], 'commodity_Carrots_KG': [0], 'commodity_Cassava (dry)_KG': [0], 'commodity_Cassava flour_KG': [0], 'commodity_Cassava leaves_KG': [0], 'commodity_Coconut_Unit': [0], 'commodity_Cowpeas_KG': [0], 'commodity_Eggs_30 pcs': [0], 'commodity_Fish_500 G': [0], 'commodity_Garlic_KG': [0], 'commodity_Groundnuts (Mix)