In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import joblib

In [2]:
df = pd.read_csv('TRAIN.csv')

In [3]:
# Convert 'Date' to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Sort by date
df.sort_values(by='Date', inplace=True)

# Handle duplicates
df['time_offset'] = df.groupby('Date').cumcount()
df['Date'] = df['Date'] + pd.to_timedelta(df['time_offset'], unit='s')
df.set_index('Date', inplace=True)
df.drop(columns=['time_offset'], inplace=True)

# Drop unnecessary columns
df.drop(columns=['ID'], inplace=True)


In [4]:
# Label encode categorical variables
categorical_columns = ['Store_Type', 'Location_Type', 'Region_Code', 'Discount']
for column in categorical_columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])

# Create new features
df['month'] = df.index.month
df['day_of_week'] = df.index.dayofweek
df['quarter'] = df.index.quarter
df['year'] = df.index.year

In [5]:
# Define features and target
X = df.drop(columns=['Sales'])
y = df['Sales']

# Train-test split
split_date = '2019-01-01'
train_X = X.loc[X.index < split_date]
test_X = X.loc[X.index >= split_date]
train_y = y.loc[y.index < split_date]
test_y = y.loc[y.index >= split_date]

In [6]:
# Scale the features
scaler = MinMaxScaler()
train_X_scaled = scaler.fit_transform(train_X)
test_X_scaled = scaler.transform(test_X)

In [7]:
# Train the model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(train_X_scaled, train_y)


In [8]:
import pickle

pickle_out = open("rf_model.pkl","wb")
pickle.dump(rf_model, pickle_out)
pickle_out.close()

In [12]:
scaler = joblib.load('scaler.pkl')

In [9]:
pip install Flask joblib pandas scikit-learn



In [10]:
from flask import Flask, request, jsonify, render_template
import numpy as np
import joblib

app = Flask(__name__)

In [None]:
@app.route('/')
def home():
    return render_template('index.html')

@app.route('/predict', methods=['POST'])
def predict():
    data = request.form['features']
    features = np.array([list(map(float, data.split(',')))])
    features_scaled = scaler.transform(features)
    prediction = rf_model.predict(features_scaled)
    output = prediction[0]
    return jsonify({'prediction': output})

if __name__ == '__main__':
    app.run(debug=True)