### Avocado Project by Francis Afuwah.
Batch: DS2312

### Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import LinearRegression

### Load and Prepare the Dataset:

In [2]:
# Load the data
data_path = 'C:/Users/Admin/Desktop/Flip Robo-Intern/Datasets/avocado_full.csv'
avocado_data = pd.read_csv(data_path)

In [3]:
# Preprocessing
avocado_data['Date'] = pd.to_datetime(avocado_data['Date'])
avocado_data['Year'] = avocado_data['Date'].dt.year
avocado_data['Month'] = avocado_data['Date'].dt.month
avocado_data['DayOfWeek'] = avocado_data['Date'].dt.dayofweek

### Encoding Categorical Data

In [4]:
encoder = LabelEncoder()
avocado_data['type_encoded'] = encoder.fit_transform(avocado_data['type'])
avocado_data['region_encoded'] = encoder.fit_transform(avocado_data['region'])

### Data Training and Testing

In [5]:
# Splitting the dataset
features = avocado_data[['Total Volume', '4046', '4225', '4770', 'Total Bags', 'Small Bags', 'Large Bags', 'XLarge Bags', 'type_encoded', 'Year', 'Month', 'DayOfWeek']]
target_regression = avocado_data['AveragePrice']
target_classification = avocado_data['region_encoded']


### Data Training and Testing

In [6]:
X_train, X_test, y_train_reg, y_test_reg = train_test_split(features, target_regression, test_size=0.2, random_state=42)
X_train, X_test, y_train_class, y_test_class = train_test_split(features, target_classification, test_size=0.2, random_state=42)


### Regression model

In [7]:
reg_model = LinearRegression()
reg_model.fit(X_train, y_train_reg)
y_pred_reg = reg_model.predict(X_test)

In [8]:
print('Regression RMSE:', np.sqrt(mean_squared_error(y_test_reg, y_pred_reg)))

Regression RMSE: 0.3021570108283471


### Classification model

In [9]:
# Classification Model
class_model = RandomForestClassifier(n_estimators=100, random_state=42)
class_model.fit(X_train, y_train_class)
y_pred_class = class_model.predict(X_test)

In [10]:
print('Classification Accuracy:', accuracy_score(y_test_class, y_pred_class))

Classification Accuracy: 0.8997260273972603


### Calculate the average price of avocado

In [11]:
# Calculate the average price
average_price = avocado_data['AveragePrice'].mean()

In [12]:
# Print the average price
print("The average price of avocados is:", average_price)

The average price of avocados is: 1.405978409775878


### To find Regions in the dataset

In [13]:
# Find unique regions
unique_regions = avocado_data['region'].unique()

In [14]:
# Print unique regions
print("Unique regions in the dataset:")
print(unique_regions)

Unique regions in the dataset:
['Albany' 'Atlanta' 'BaltimoreWashington' 'Boise' 'Boston'
 'BuffaloRochester' 'California' 'Charlotte' 'Chicago' 'CincinnatiDayton'
 'Columbus' 'DallasFtWorth' 'Denver' 'Detroit' 'GrandRapids' 'GreatLakes'
 'HarrisburgScranton' 'HartfordSpringfield' 'Houston' 'Indianapolis'
 'Jacksonville' 'LasVegas' 'LosAngeles' 'Louisville' 'MiamiFtLauderdale'
 'Midsouth' 'Nashville' 'NewOrleansMobile' 'NewYork' 'Northeast'
 'NorthernNewEngland' 'Orlando' 'Philadelphia' 'PhoenixTucson'
 'Pittsburgh' 'Plains' 'Portland' 'RaleighGreensboro' 'RichmondNorfolk'
 'Roanoke' 'Sacramento' 'SanDiego' 'SanFrancisco' 'Seattle'
 'SouthCarolina' 'SouthCentral' 'Southeast' 'Spokane' 'StLouis' 'Syracuse'
 'Tampa' 'TotalUS' 'West' 'WestTexNewMexico']


### To calculate average price per Region

In [15]:
# Calculate the average price per region
average_price_per_region = avocado_data.groupby('region')['AveragePrice'].mean()

# Print the average price per region
print("Average price per region:")
print(average_price_per_region)

Average price per region:
region
Albany                 1.561036
Atlanta                1.337959
BaltimoreWashington    1.534231
Boise                  1.348136
Boston                 1.530888
BuffaloRochester       1.516834
California             1.395325
Charlotte              1.606036
Chicago                1.556775
CincinnatiDayton       1.209201
Columbus               1.252781
DallasFtWorth          1.085592
Denver                 1.218580
Detroit                1.276095
GrandRapids            1.505000
GreatLakes             1.338550
HarrisburgScranton     1.513284
HartfordSpringfield    1.818639
Houston                1.047929
Indianapolis           1.313994
Jacksonville           1.510947
LasVegas               1.380917
LosAngeles             1.216006
Louisville             1.286686
MiamiFtLauderdale      1.428491
Midsouth               1.404763
Nashville              1.212101
NewOrleansMobile       1.304793
NewYork                1.727574
Northeast              1.601923
Norther

### Total volume per Region

In [16]:
# Calculate the total volume of avocados sold per region
total_volume_per_region = avocado_data.groupby('region')['Total Volume'].sum()

# Print the total volume per region
print("Total volume per region:")
print(total_volume_per_region)

Total volume per region:
region
Albany                 1.606780e+07
Atlanta                8.860512e+07
BaltimoreWashington    1.347139e+08
Boise                  1.441319e+07
Boston                 9.727398e+07
BuffaloRochester       2.296247e+07
California             1.028982e+09
Charlotte              3.555554e+07
Chicago                1.337023e+08
CincinnatiDayton       4.452201e+07
Columbus               2.999336e+07
DallasFtWorth          2.084193e+08
Denver                 1.389025e+08
Detroit                6.342242e+07
GrandRapids            3.021174e+07
GreatLakes             5.896425e+08
HarrisburgScranton     4.180886e+07
HartfordSpringfield    5.067054e+07
Houston                2.031679e+08
Indianapolis           3.026339e+07
Jacksonville           2.879000e+07
LasVegas               5.437691e+07
LosAngeles             5.078965e+08
Louisville             1.609700e+07
MiamiFtLauderdale      9.767322e+07
Midsouth               5.083494e+08
Nashville              3.561209e