In [None]:
 # Necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from google.colab import files
import numpy as np
from datetime import datetime

# Function to handle species logging
def update_species_log(predicted_species):
    print(f"\nLogging details for {predicted_species}:")

    # Get location
    location = input("Where did you find the species? ")

    # Get time with 12-hour format validation
    while True:
        observation_time = input("What time did you find the species? (e.g., 3:30 PM): ")
        if re.match(r'^(1[0-2]|0?[1-9]):([0-5][0-9])\s?([AP]M)$', observation_time, re.IGNORECASE):
            break
        print("Invalid format! Please use 'HH:MM AM/PM' format (e.g., 3:30 PM)")

    # Create new entry
    new_entry = {
        'Species': predicted_species,
        'Location': location,
        'Time': observation_time.upper(),
        'Date': datetime.now().strftime('%Y-%m-%d'),
        'Count': 1
    }

    # Create new log (no upload required)
    updated_log = pd.DataFrame([new_entry])

    # Create summary tally
    tally = updated_log.groupby('Species', as_index=False).agg({
        'Count': 'sum',
        'Location': lambda x: ', '.join(set(x)),
        'Date': ['min', 'max']
    })
    tally.columns = ['Species', 'Total Sightings', 'Locations', 'First Seen', 'Last Seen']

    # Save and download
    with pd.ExcelWriter('species_log.xlsx') as writer:
        updated_log.to_excel(writer, sheet_name='Detailed Log', index=False)
        tally.to_excel(writer, sheet_name='Summary Tally', index=False)

    print("\nDownloading species log...")
    files.download('species_log.xlsx')
    print("File 'species_log.xlsx' has been downloaded!")

# Main program
print("===== Invasive Species Identification and Logging System =====")

# Step 1: Upload and load dataset
print("\n[Step 1/6] Upload training data:")
print("Please upload your 'mesa Dataset.xlsx' file")
uploaded = files.upload()
file_name = next(iter(uploaded))
df = pd.read_excel(file_name, sheet_name='Sheet1').drop(columns=['Unnamed: 0'], errors='ignore')

# Step 2: Preprocess data
print("\n[Step 2/6] Preprocessing data...")
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
numerical_cols = df.select_dtypes(exclude=['object']).columns.tolist()


df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].median())
df[categorical_cols] = df[categorical_cols].fillna('Unknown')

label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

X = df.drop(columns=['Species'])
y = df['Species']

# Step 3: Train model
print("\n[Step 3/6] Training model...")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Step 4: Evaluate model
print("\n[Step 4/6] Evaluating model...")
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

unique_labels = sorted(set(y_test) | set(y_pred))
target_names = [label_encoders['Species'].classes_[label] for label in unique_labels]
print("\nClassification Report:")
print(classification_report(y_test, y_pred, labels=unique_labels, target_names=target_names))

# Step 5: Get user input for prediction
print("\n[Step 5/6] Species Identification")
user_input = {}

for column in X.columns:
    if column in categorical_cols:
        valid_options = label_encoders[column].classes_
        while True:
            value = input(f"Enter {column} ({'/'.join(valid_options)}): ")
            if value in valid_options:
                break
            print(f"Invalid option! Please choose from {', '.join(valid_options)}")
        user_input[column] = value
    else:
        while True:
            try:
                value = float(input(f"Enter {column} (numerical value): "))
                break
            except ValueError:
                print("Invalid input! Please enter a number.")
        user_input[column] = value

# Prepare input sample
sample_df = pd.DataFrame([user_input])
for col in categorical_cols:
    if col in sample_df.columns:
        sample_df[col] = label_encoders[col].transform([user_input[col]])[0]

sample_df = sample_df[X.columns]

# Make prediction of the invasie specie
prediction = model.predict(sample_df)
predicted_species = label_encoders['Species'].inverse_transform(prediction)[0]

# Step 6: Log the data
print("\n[Step 6/6] Observation Logging")
print(f"\nPredicted Species: {predicted_species}")
update_species_log(predicted_species)

print("\nProcess completed successfully!")