In [3]:
 #Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping
import joblib

In [7]:
import pandas as pd

try:
    df = pd.read_csv('data/raw/revenue_prediction.csv')
    print("Data loaded successfully. Shape:", df.shape)
except FileNotFoundError:
    print("Error: File not found. Check file path.")
    exit()


Data loaded successfully. Shape: (100, 8)


In [8]:
# Display missing values
print("\nMissing values in each column:")
print(df.isnull().sum())

# Fill or drop missing values
df = df.dropna()  # or df.fillna(value)



Missing values in each column:
Id              0
Name            0
Franchise       0
Category        0
City            0
No_Of_Item      0
Order_Placed    0
Revenue         0
dtype: int64


In [10]:
print("\nFirst 5 rows:")
print(df.head())


First 5 rows:
    Id                  Name Franchise       Category       City  No_Of_Item  \
0  101    HungryHowie'sPizza       Yes        Mexican  Bengaluru          55   
1  102  CharleysPhillySteaks        No    Varied Menu   Gurugram          72   
2  103                Chuy's       Yes        Chicken       Pune          25   
3  104           O'Charley's       Yes  Italian/Pizza     Mumbai          18   
4  105         PolloTropical       Yes          Pizza      Noida          48   

   Order_Placed  Revenue  
0           5.5  5953753  
1           6.8  7223131  
2           1.9  2555379  
3           2.5  2175511  
4           4.2  4816715  


In [13]:
# Remove duplicate rows
df = df.drop_duplicates()


In [15]:
# Data preprocessing
df['No_Of_Item'] = df['No_Of_Item'].replace(0, 1)  # Prevent division by zero
df['Order_Item_Ratio'] = df['Order_Placed'] / df['No_Of_Item']

In [21]:
# Handle categorical features
df['Franchise'] = df['Franchise'].astype('category').cat.codes
df['Category'] = df['Category'].astype('category').cat.codes

In [22]:
# Remove outliers
q_low = df['Revenue'].quantile(0.05)
q_high = df['Revenue'].quantile(0.95)
df = df[(df['Revenue'] > q_low) & (df['Revenue'] < q_high)]

In [14]:
# Feature selection
features = ['Franchise', 'Category', 'No_Of_Item', 'Order_Placed', 'Order_Item_Ratio']
X = df[features]
y = df['Revenue']

In [24]:
# Train-test split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [34]:
# Feature scaling
feature_scaler = StandardScaler()
X_train_scaled = feature_scaler.fit_transform(X_train)
X_test_scaled = feature_scaler.transform(X_test)
joblib.dump(feature_scaler, 'models/feature_scaler.pkl')

['models/feature_scaler.pkl']

In [36]:
# Target scaling
target_scaler = StandardScaler()
y_train_scaled = target_scaler.fit_transform(y_train.values.reshape(-1, 1))
y_test_scaled = target_scaler.transform(y_test.values.reshape(-1, 1))
joblib.dump(target_scaler, 'models/target_scaler.pkl')

['models/target_scaler.pkl']

In [28]:
# Build neural network
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
    BatchNormalization(),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)
])


In [29]:
# Compile model
model.compile(
    optimizer='adam',
    loss='mean_squared_error',
    metrics=['mae']
)

In [30]:
# Train with early stopping
history = model.fit(
    X_train_scaled, y_train_scaled,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[EarlyStopping(patience=10, restore_best_weights=True)]
)

Epoch 1/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 540ms/step - loss: 4.4036 - mae: 1.2291 - val_loss: 1.2058 - val_mae: 0.5245
Epoch 2/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step - loss: 3.1042 - mae: 1.0865 - val_loss: 1.1977 - val_mae: 0.5267
Epoch 3/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step - loss: 3.0512 - mae: 1.0224 - val_loss: 1.2003 - val_mae: 0.5341
Epoch 4/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 139ms/step - loss: 2.1728 - mae: 0.8201 - val_loss: 1.2095 - val_mae: 0.5441
Epoch 5/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 185ms/step - loss: 2.2692 - mae: 0.8320 - val_loss: 1.2258 - val_mae: 0.5567
Epoch 6/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step - loss: 1.9899 - mae: 0.7163 - val_loss: 1.2443 - val_mae: 0.5690
Epoch 7/100
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step - loss: 1

In [37]:
# Save model
model.save('models/neural_network_model.keras')