In [3]:
import pandas as pd
import numpy as np

# Define the list of possible waste types
waste_types = ['plastic', 'paper', 'bio', 'other']

# Define the range of dates
start_date = '2022-09-01'
end_date = '2022-12-31'
dates = pd.date_range(start=start_date, end=end_date, freq='D')

# Define the user ids
user_ids = [1, 2, 3, 4]

# Generate the random data
data = []
for date in dates:
    for user_id in user_ids:
        waste_type = np.random.choice(waste_types)
        data.append({'user_id': user_id, 'waste_type': waste_type, 'date': date})

# Convert the data into a data frame
df = pd.DataFrame(data)

In [4]:
df

Unnamed: 0,user_id,waste_type,date
0,1,bio,2022-09-01
1,2,other,2022-09-01
2,3,paper,2022-09-01
3,4,other,2022-09-01
4,1,bio,2022-09-02
...,...,...,...
483,4,plastic,2022-12-30
484,1,plastic,2022-12-31
485,2,other,2022-12-31
486,3,paper,2022-12-31


In [5]:
df.to_csv("data2.csv")

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [8]:
#load the data
data = pd.read_csv('data2.csv')

#reprocess the data
data.dropna(inplace=True)
data.drop_duplicates(inplace=True)
data = data[['user_id', 'waste_type', 'date']]
data['date'] = pd.to_datetime(data['date'])

# Feature engineering
data['month'] = data['date'].dt.month
data = pd.pivot_table(data, index='user_id', columns='waste_type', values='month', aggfunc='count', fill_value=0)
data['total'] = data.sum(axis=1)

# Model training and evaluation
X = data.drop(columns=['total'])
y = data['total']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

# Prediction
new_data = data.iloc[:, :-1].tail(6)
prediction = clf.predict(new_data)
print('Prediction:', prediction)

Accuracy: 1.0
Prediction: [122 122 122 122]


NameError: name 'data' is not defined