# Ice-cream Sales Revenue Prediction

In [1]:
# import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# import dataset
icecream = pd.read_csv('https://github.com/YBIFoundation/Dataset/raw/main/Ice%20Cream.csv')

In [3]:
icecream.head()

Unnamed: 0,Temperature,Revenue
0,24.6,535
1,26.1,626
2,27.8,661
3,20.6,488
4,11.6,317


In [4]:
icecream.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Temperature  500 non-null    float64
 1   Revenue      500 non-null    int64  
dtypes: float64(1), int64(1)
memory usage: 7.9 KB


In [5]:
icecream.describe()

Unnamed: 0,Temperature,Revenue
count,500.0,500.0
mean,22.2816,522.058
std,8.097597,175.410399
min,0.0,10.0
25%,17.175,406.0
50%,22.4,530.0
75%,27.8,643.0
max,45.0,1000.0


In [6]:
# define y and X
icecream.columns

Index(['Temperature', 'Revenue'], dtype='object')

In [7]:
y = icecream['Revenue']
X = icecream[['Temperature']]

In [8]:
# split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=2529)

In [9]:
# select model
from sklearn.linear_model import LinearRegression
reg = LinearRegression()

In [10]:
# train model
reg.fit(X_train,y_train)

In [11]:
# intercept and slope
reg.intercept_, reg.coef_

(44.01809759024559, array([21.4695184]))

In [12]:
# prediction
y_pred = reg.predict(X_test)

In [13]:
# model evaluation
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score

In [14]:
mean_absolute_error(y_test,y_pred)

18.36955828371228

In [15]:
mean_squared_error(y_test,y_pred)

517.4978493670573

In [16]:
mean_absolute_percentage_error(y_test,y_pred)

0.04155209268159152

In [17]:
r2_score(y_test,y_pred)

0.9841801999698918

## Use Statsmodels Library

In [18]:
import statsmodels.api as sm
X1 = sm.add_constant(X)
print(sm.OLS(y,X).fit().summary())

                                 OLS Regression Results                                
Dep. Variable:                Revenue   R-squared (uncentered):                   0.997
Model:                            OLS   Adj. R-squared (uncentered):              0.997
Method:                 Least Squares   F-statistic:                          1.777e+05
Date:                Mon, 15 Jul 2024   Prob (F-statistic):                        0.00
Time:                        11:25:05   Log-Likelihood:                         -2395.6
No. Observations:                 500   AIC:                                      4793.
Df Residuals:                     499   BIC:                                      4797.
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  
                  coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------