In [None]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Path to the training dataset
train_path = '/Users/suvarnadurghadontamsetty/Documents/foods_testing.csv'

# Load the training dataset
train_df = pd.read_csv(train_path, sep=',', quotechar='"', encoding='ISO-8859-1', nrows=10000)

# Remove duplicates
train_df = train_df.drop_duplicates()

# Encode the 'score_level' column using LabelEncoder
label_encoder = LabelEncoder()
train_df['score_level'] = label_encoder.fit_transform(train_df['score_level'])

# Select features and labels
features = train_df[['ID']]  # Include more relevant features here if available
labels = train_df['score_level']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Initialize the RandomForestClassifier model
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)

# Fit the model on the training data
model.fit(X_train, y_train)

# Predict on the test set
predictions = model.predict(X_test)

# Calculate and print the model accuracy
accuracy = accuracy_score(y_test, predictions)
print(f'Model Accuracy: {accuracy}')

# Path to the testing dataset
test_path = '/Users/suvarnadurghadontamsetty/Documents/foods_training.csv'

# Check if the test file exists and is not empty
if os.path.exists(test_path) and os.path.getsize(test_path) > 0:
    # Load the test dataset
    test_df = pd.read_csv(test_path, sep=',', quotechar='"', encoding='ISO-8859-1', nrows=10000)

    # Ensure the test data has the same feature columns as the training set
    if 'ID' in test_df.columns:
        # Predict the labels on the test data using the trained model
        predicted_labels = label_encoder.inverse_transform(model.predict(test_df[['ID']]))
        
        # Create a submission DataFrame with 'ID' and predicted 'score_level'
        submission_df = pd.DataFrame({'ID': test_df['ID'], 'score_level': predicted_labels})
        
        # Save the submission DataFrame to a CSV file
        submission_df.to_csv('submission.csv', index=False)
        print('Submission file created successfully.')
    else:
        print('The test file does not contain the required "ID" column.')
else:
    print(f"File at {test_path} is either missing or empty.")

