In [1]:
!pip install scikit-learn



In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [5]:
#genrating synthetic dataset related to kumbh mela
np.random.seed(42)
days = np.arange(1, 101) 
tourists = 5000 + 300 * days + np.random.normal(0, 5000, size=len(days))

data = pd.DataFrame({'Days': days, 'Tourists':tourists})
data

Unnamed: 0,Days,Tourists
0,1,7783.570765
1,2,4908.678494
2,3,9138.442691
3,4,13815.149282
4,5,5329.233126
...,...,...
95,96,26482.425259
96,97,35580.601385
97,98,35705.276361
98,99,34725.567283


In [7]:
#splitting data into train and test sets
X = data[['Days']]
y = data[['Tourists']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 

In [9]:
X_train

Unnamed: 0,Days
55,56
88,89
26,27
42,43
69,70
...,...
60,61
71,72
14,15
92,93


In [11]:
#training a simple linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [13]:
#making predictions
y_pred = model.predict(X_test)

In [15]:
y_pred

array([[29805.47908654],
       [20622.38763294],
       [25826.13945665],
       [18173.56324532],
       [17867.46019686],
       [16336.9449546 ],
       [11133.19313089],
       [28887.16994118],
       [ 7459.95654945],
       [ 4398.92606492],
       [ 9908.78093708],
       [13582.01751852],
       [26744.44860201],
       [14500.32666388],
       [31948.20042572],
       [ 5623.33825873],
       [27662.75774737],
       [27968.86079582],
       [ 8072.16264636],
       [13888.12056697]])

In [27]:
X_test

Unnamed: 0,Days
83,84
53,54
70,71
45,46
44,45
39,40
22,23
80,81
10,11
0,1


In [29]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

In [31]:
mae

3006.561737491561

In [33]:
mse

15551056.057254458

In [None]:
 # Plotting results
plt.figure(figsize=(10, 5))
plt.scatter(X_train, y_train, color='blue', label='Training Data')
plt.scatter(X_test, y_test, color="red", label='Testing Data')
plt.plot(X_test, y_pred, color='black', linewidth=2, label='Prediction') 
plt.xlabel('Days')
plt.ylabel('Tourist Footfall')
plt.title('Tourist Footfall Prediction using Linear Regression')
plt.legend()
plt.show()