In [238]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf 
from tensorflow.keras import layers

In [239]:
df = pd.read_csv('avocado.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Date,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
0,0,2015-12-27,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany
1,1,2015-12-20,1.35,54876.98,674.28,44638.81,58.33,9505.56,9408.07,97.49,0.0,conventional,2015,Albany
2,2,2015-12-13,0.93,118220.22,794.7,109149.67,130.5,8145.35,8042.21,103.14,0.0,conventional,2015,Albany
3,3,2015-12-06,1.08,78992.15,1132.0,71976.41,72.58,5811.16,5677.4,133.76,0.0,conventional,2015,Albany
4,4,2015-11-29,1.28,51039.6,941.48,43838.39,75.78,6183.95,5986.26,197.69,0.0,conventional,2015,Albany


In [240]:
df.drop(df.columns[0],axis=1,inplace=True)

In [241]:
df.isnull().sum()

Date            0
AveragePrice    0
Total Volume    0
4046            0
4225            0
4770            0
Total Bags      0
Small Bags      0
Large Bags      0
XLarge Bags     0
type            0
year            0
region          0
dtype: int64

In [242]:
def get_uniques(df,columns):
    return {column: list(df[column].unique()) for column in columns}

In [243]:
category_columns=["region","Date","type"]

In [244]:
get_uniques(df,category_columns)

{'region': ['Albany',
  'Atlanta',
  'BaltimoreWashington',
  'Boise',
  'Boston',
  'BuffaloRochester',
  'California',
  'Charlotte',
  'Chicago',
  'CincinnatiDayton',
  'Columbus',
  'DallasFtWorth',
  'Denver',
  'Detroit',
  'GrandRapids',
  'GreatLakes',
  'HarrisburgScranton',
  'HartfordSpringfield',
  'Houston',
  'Indianapolis',
  'Jacksonville',
  'LasVegas',
  'LosAngeles',
  'Louisville',
  'MiamiFtLauderdale',
  'Midsouth',
  'Nashville',
  'NewOrleansMobile',
  'NewYork',
  'Northeast',
  'NorthernNewEngland',
  'Orlando',
  'Philadelphia',
  'PhoenixTucson',
  'Pittsburgh',
  'Plains',
  'Portland',
  'RaleighGreensboro',
  'RichmondNorfolk',
  'Roanoke',
  'Sacramento',
  'SanDiego',
  'SanFrancisco',
  'Seattle',
  'SouthCarolina',
  'SouthCentral',
  'Southeast',
  'Spokane',
  'StLouis',
  'Syracuse',
  'Tampa',
  'TotalUS',
  'West',
  'WestTexNewMexico'],
 'Date': ['2015-12-27',
  '2015-12-20',
  '2015-12-13',
  '2015-12-06',
  '2015-11-29',
  '2015-11-22',
  '20

In [245]:
date_ordering = sorted(df['Date'].unique())

In [246]:
def ordinal_encode(df, column, ordering):
    df = df.copy()
    df[column] = df[column].apply(lambda x: ordering.index(x))
    return df

In [247]:
def onehot_encode(df,column):
    df=df.copy()
    dummies=pd.get_dummies(df[column])
    df=pd.concat([df,dummies],axis=1)
    df.drop(column,axis=1,inplace=True)
    return df

In [248]:
df=onehot_encode(df,"region")

In [249]:
df.shape

(18249, 66)

In [250]:
target_col="type"
le=LabelEncoder()
df[target_col]=le.fit_transform(df[target_col])

In [251]:
y=df[target_col]
x=df.drop(target_col,axis=1)

In [252]:
df.columns

Index(['Date', 'AveragePrice', 'Total Volume', '4046', '4225', '4770',
       'Total Bags', 'Small Bags', 'Large Bags', 'XLarge Bags', 'type', 'year',
       'Albany', 'Atlanta', 'BaltimoreWashington', 'Boise', 'Boston',
       'BuffaloRochester', 'California', 'Charlotte', 'Chicago',
       'CincinnatiDayton', 'Columbus', 'DallasFtWorth', 'Denver', 'Detroit',
       'GrandRapids', 'GreatLakes', 'HarrisburgScranton',
       'HartfordSpringfield', 'Houston', 'Indianapolis', 'Jacksonville',
       'LasVegas', 'LosAngeles', 'Louisville', 'MiamiFtLauderdale', 'Midsouth',
       'Nashville', 'NewOrleansMobile', 'NewYork', 'Northeast',
       'NorthernNewEngland', 'Orlando', 'Philadelphia', 'PhoenixTucson',
       'Pittsburgh', 'Plains', 'Portland', 'RaleighGreensboro',
       'RichmondNorfolk', 'Roanoke', 'Sacramento', 'SanDiego', 'SanFrancisco',
       'Seattle', 'SouthCarolina', 'SouthCentral', 'Southeast', 'Spokane',
       'StLouis', 'Syracuse', 'Tampa', 'TotalUS', 'West', 'WestTexNewMe

In [254]:
x = x.drop('Date', axis=1)

In [255]:
scaler=StandardScaler()
x=scaler.fit_transform(x)

In [256]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.2,random_state=42)

In [261]:
input_shape = (65,)

# Input layer
input_layer = layers.Input(shape=input_shape)

# Dense layers
x = layers.Dense(128, activation="relu")(input_layer)
x = layers.Dropout(0.3)(x)
x = layers.Dense(64, activation="relu")(x)

# Output layer
output_layer = layers.Dense(1, activation="sigmoid")(x)

# Define the model
model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Print model summary
model.summary()

In [263]:
xtrain = np.random.rand(100, 64)  # Example data
ytrain = np.random.randint(0, 2, size=(100, 1))  # Example labels

# Reshape xtrain to have 65 features
xtrain_resized = np.hstack((xtrain, np.zeros((xtrain.shape[0], 1))))  # Add one more feature for demonstration
input_shape = xtrain_resized.shape[1:]

# Input layer
input_layer = layers.Input(shape=input_shape)

# Dense layers (example architecture)
x = layers.Dense(128, activation="relu")(input_layer)
x = layers.Dropout(0.3)(x)
x = layers.Dense(64, activation="relu")(x)

# Output layer
output_layer = layers.Dense(1, activation="sigmoid")(x)

# Define the model
model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Print model summary
model.summary()

# Train the model
history = model.fit(
    xtrain_resized,
    ytrain,
    validation_split=0.2,
    batch_size=32,
    epochs=70,
    callbacks=[tf.keras.callbacks.ReduceLROnPlateau()]
)

Epoch 1/70
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.4297 - loss: 0.7240 - val_accuracy: 0.5000 - val_loss: 0.6676 - learning_rate: 0.0010
Epoch 2/70
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5852 - loss: 0.6834 - val_accuracy: 0.7000 - val_loss: 0.6638 - learning_rate: 0.0010
Epoch 3/70
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6289 - loss: 0.6544 - val_accuracy: 0.5500 - val_loss: 0.6665 - learning_rate: 0.0010
Epoch 4/70
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6445 - loss: 0.6416 - val_accuracy: 0.5500 - val_loss: 0.6668 - learning_rate: 0.0010
Epoch 5/70
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4938 - loss: 0.6726 - val_accuracy: 0.6000 - val_loss: 0.6599 - learning_rate: 0.0010
Epoch 6/70
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accur

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8594 - loss: 0.4498 - val_accuracy: 0.5500 - val_loss: 0.7001 - learning_rate: 1.0000e-06
Epoch 46/70
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8164 - loss: 0.4733 - val_accuracy: 0.5500 - val_loss: 0.7001 - learning_rate: 1.0000e-06
Epoch 47/70
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7703 - loss: 0.5116 - val_accuracy: 0.5500 - val_loss: 0.7001 - learning_rate: 1.0000e-06
Epoch 48/70
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8477 - loss: 0.4755 - val_accuracy: 0.5500 - val_loss: 0.7001 - learning_rate: 1.0000e-06
Epoch 49/70
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8508 - loss: 0.4737 - val_accuracy: 0.5500 - val_loss: 0.7002 - learning_rate: 1.0000e-06
Epoch 50/70
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms

In [265]:
xtest_adjusted = np.hstack((xtest, np.zeros((xtest.shape[0], 1))))  # Add one more feature for demonstration

# Evaluate the model with adjusted xtest
loss, accuracy = model.evaluate(xtest_adjusted, ytest)

print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

[1m115/115[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 261us/step - accuracy: 0.4938 - loss: 0.9267
Test Loss: 0.9297932386398315
Test Accuracy: 0.48958903551101685


# Random Forest Classifier

In [267]:
df = pd.read_csv('avocado.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Date,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
0,0,2015-12-27,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany
1,1,2015-12-20,1.35,54876.98,674.28,44638.81,58.33,9505.56,9408.07,97.49,0.0,conventional,2015,Albany
2,2,2015-12-13,0.93,118220.22,794.7,109149.67,130.5,8145.35,8042.21,103.14,0.0,conventional,2015,Albany
3,3,2015-12-06,1.08,78992.15,1132.0,71976.41,72.58,5811.16,5677.4,133.76,0.0,conventional,2015,Albany
4,4,2015-11-29,1.28,51039.6,941.48,43838.39,75.78,6183.95,5986.26,197.69,0.0,conventional,2015,Albany


In [268]:
X = df[['AveragePrice', 'Total Volume', '4046', '4225', '4770', 'Total Bags', 'Small Bags', 'Large Bags', 'XLarge Bags', 'year', 'type']]
y = df['region']

In [269]:
X = pd.get_dummies(X, columns=['type'])

In [270]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [272]:
from sklearn.ensemble import RandomForestClassifier

In [274]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

In [275]:
rf_classifier.fit(X_train, y_train)

In [276]:
y_pred = rf_classifier.predict(X_test)

In [278]:
from sklearn.metrics import accuracy_score

In [279]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Random Forest Classifier: {accuracy:.4f}")

Accuracy of Random Forest Classifier: 0.9074


# For regression model

In [280]:
df = pd.read_csv('avocado.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Date,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
0,0,2015-12-27,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany
1,1,2015-12-20,1.35,54876.98,674.28,44638.81,58.33,9505.56,9408.07,97.49,0.0,conventional,2015,Albany
2,2,2015-12-13,0.93,118220.22,794.7,109149.67,130.5,8145.35,8042.21,103.14,0.0,conventional,2015,Albany
3,3,2015-12-06,1.08,78992.15,1132.0,71976.41,72.58,5811.16,5677.4,133.76,0.0,conventional,2015,Albany
4,4,2015-11-29,1.28,51039.6,941.48,43838.39,75.78,6183.95,5986.26,197.69,0.0,conventional,2015,Albany


In [281]:
X = df[['Total Volume', '4046', '4225', '4770', 'Total Bags', 'Small Bags', 'Large Bags', 'XLarge Bags', 'year', 'type', 'region']]
y = df['AveragePrice']


In [282]:
X = pd.get_dummies(X, columns=['type', 'region'])

In [283]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [286]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [287]:
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)

In [289]:
rf_regressor.fit(X_train, y_train)

In [292]:
y_pred = rf_regressor.predict(X_test)

In [293]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [294]:
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"R-squared (R2): {r2:.4f}")

Mean Squared Error (MSE): 0.0231
R-squared (R2): 0.8565
