In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics.pairwise import euclidean_distances
from xgboost import XGBClassifier
import pickle
import os
import math
from reportlab.lib.pagesizes import letter
from reportlab.lib import colors
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak, HRFlowable
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.platypus import PageTemplate, Frame, NextPageTemplate
from reportlab.platypus.flowables import KeepTogether
from datetime import datetime

# Define Colors
DARK_GREEN = colors.HexColor("#2E7D32")
DARK_BLUE = colors.HexColor("#1565C0")
SOFT_GRAY = colors.HexColor("#ECEFF1")
BLACK = colors.HexColor("#212121")
GRAY = colors.HexColor("#757575")

# Realistic Ranges
realistic_ranges = {
    'N': (0, 200), 'P': (0, 200), 'K': (0, 250), 'temperature': (5, 50),
    'humidity': (0, 100), 'ph': (3, 11), 'rainfall': (0, 500)
}

# Crop Mapping
crop_name_mapping = {
    'Rice': 'Rice_subcrop_data.csv',
    'Maize': 'Maize_subcrop_data.csv',
    'Bengal Gram (Gram)(Whole)': 'Bengal Gram (Gram)(Whole)_subcrop_data.csv',
    'Pegeon Pea (Arhar Fali)': 'Pegeon Pea (Arhar Fali)_subcrop_data.csv',
    'Moath Dal': 'Moath Dal_subcrop_data.csv',
    'Green Gram (Moong)(Whole)': 'Green Gram (Moong)(Whole)_subcrop_data.csv',
    'Black Gram Dal (Urd Dal)': 'Black Gram Dal (Urd Dal)_subcrop_data.csv',
    'Lentil (Masur)(Whole)': 'Lentil (Masur)(Whole)_subcrop_data.csv',
    'Pomegranate': 'Pomegranate_subcrop_data.csv',
    'Banana': 'Banana_subcrop_data.csv',
    'Mango': 'Mango_subcrop_data.csv',
    'Grapes': 'Grapes_subcrop_data.csv',
    'Water Melon': 'Water Melon_subcrop_data.csv',
    'Karbuja (Musk Melon)': 'Karbuja (Musk Melon)_subcrop_data.csv',
    'Apple': 'Apple_subcrop_data.csv',
    'Orange': 'Orange_subcrop_data.csv',
    'Papaya': 'Papaya_subcrop_data.csv',
    'Coconut': 'Coconut_subcrop_data.csv',
    'Cotton': 'Cotton_subcrop_data.csv',
    'Jute': 'Jute_subcrop_data.csv',
    'Coffee': 'Coffee_subcrop_data.csv'
}

# SubCropRecommender Class
class SubCropRecommender:
    def __init__(self, main_model_path='main_crop_model.pkl', subcrop_dir='C:/Projects/Creative & Innovative Project/datasets/sub_crop_data/'):
        self.main_model = self.load_main_crop_model(main_model_path)
        self.subcrop_dir = subcrop_dir
        self.crop_name_mapping = crop_name_mapping
        self.realistic_ranges = realistic_ranges

    def load_main_crop_model(self, path):
        try:
            with open(path, 'rb') as file:
                model_data = pickle.load(file)
                return model_data
        except Exception as e:
            print(f"Error loading main crop model: {str(e)}")
            return None

    def validate_and_preprocess_input(self, N, P, K, temperature, humidity, ph, rainfall):
        inputs = {'N': N, 'P': P, 'K': K, 'temperature': temperature, 
                  'humidity': humidity, 'ph': ph, 'rainfall': rainfall}
        for param, val in inputs.items():
            try:
                inputs[param] = float(val)
            except (ValueError, TypeError):
                return False, f"Invalid input: {param} must be a number", []
        capped_inputs = {}
        warnings_list = []
        for param, val in inputs.items():
            min_val, max_val = self.realistic_ranges[param]
            if val < min_val or val > max_val:
                warnings_list.append(f"{param} ({val}) outside realistic range ({min_val}-{max_val}), capped")
                capped_inputs[param] = max(min_val, min(val, max_val))
            else:
                capped_inputs[param] = val
        return True, capped_inputs, warnings_list

    def recommend_sub_crops(self, N, P, K, temperature, humidity, ph, rainfall, num_recommendations=3):
        try:
            if self.main_model is None:
                return {"error": "Main crop model not loaded", "main_crop": None, "sub_crops": [], "warnings": None}
            
            is_valid, capped_inputs, warnings = self.validate_and_preprocess_input(
                N, P, K, temperature, humidity, ph, rainfall
            )
            if not is_valid:
                return {"error": capped_inputs, "main_crop": None, "sub_crops": [], "warnings": warnings}
            
            input_df = pd.DataFrame([[capped_inputs['N'], capped_inputs['P'], 
                                      capped_inputs['K'], capped_inputs['temperature'], 
                                      capped_inputs['humidity'], capped_inputs['ph'], 
                                      capped_inputs['rainfall']]],
                                    columns=['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall'])
            input_scaled = self.main_model['scaler'].transform(input_df)
            main_crop_encoded = self.main_model['model'].predict(input_scaled)[0]
            main_crop = self.main_model['label_encoder'].inverse_transform([main_crop_encoded])[0]
            main_confidence = float(max(self.main_model['model'].predict_proba(input_scaled)[0]))
            
            if main_crop not in self.crop_name_mapping:
                return {"error": f"No sub-crop mapping for {main_crop}", "main_crop": main_crop, 
                        "sub_crops": [], "warnings": warnings}
            
            subcrop_filename = self.crop_name_mapping[main_crop]
            subcrop_file = os.path.join(self.subcrop_dir, subcrop_filename)
            
            if not os.path.exists(subcrop_file):
                return {"error": f"Sub-crop file {subcrop_filename} not found", 
                        "main_crop": main_crop, "sub_crops": [], "warnings": warnings}
            
            sub_crop_df = pd.read_csv(subcrop_file)
            required_cols = ['sub-crop', 'N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
            missing_cols = [col for col in required_cols if col not in sub_crop_df.columns]
            if missing_cols:
                return {"error": f"Missing columns: {missing_cols}", "main_crop": main_crop, 
                        "sub_crops": [], "warnings": warnings}
            
            input_vector = np.array([[capped_inputs['N'], capped_inputs['P'], capped_inputs['K'], 
                                      capped_inputs['temperature'], capped_inputs['humidity'], 
                                      capped_inputs['ph'], capped_inputs['rainfall']]])
            sub_crop_features = sub_crop_df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']].values
            sub_crop_names = sub_crop_df['sub-crop'].values
            
            distances = euclidean_distances(input_vector, sub_crop_features)[0]
            sub_crops_with_distances = list(zip(sub_crop_names, distances, sub_crop_features))
            sorted_sub_crops = sorted(sub_crops_with_distances, key=lambda x: x[1])[:num_recommendations]
            recommended_sub_crops = [{"sub_crop": crop, "distance": float(dist), "features": features} 
                                    for crop, dist, features in sorted_sub_crops]
            
            return {
                "main_crop": main_crop,
                "main_confidence": main_confidence,
                "sub_crops": recommended_sub_crops,
                "warnings": warnings if warnings else None
            }
        except Exception as e:
            return {"error": str(e), "main_crop": None, "sub_crops": [], "warnings": None}

    def calculate_subcrop_accuracy(self, num_recommendations=3):
        total_tests = 0
        correct_matches = 0
        precision_sum = 0
        reciprocal_rank_sum = 0
        dcg_sum = 0
        distances_correct = []
        diversity_sum = 0
        all_recommended_subcrops = set()
        total_unique_subcrops = set()
        skipped_datasets = []
        evaluated_datasets = []
        
        with open('subcrop_accuracy_debug.txt', 'w') as debug_file:
            for main_crop, filename in self.crop_name_mapping.items():
                file_path = os.path.join(self.subcrop_dir, filename)
                if not os.path.exists(file_path):
                    skipped_datasets.append(f"{main_crop}: File {filename} not found")
                    debug_file.write(f"Skipping {main_crop}: {filename} not found\n")
                    continue
                
                try:
                    sub_crop_df = pd.read_csv(file_path)
                    required_cols = ['sub-crop', 'N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
                    if not all(col in sub_crop_df.columns for col in required_cols):
                        skipped_datasets.append(f"{main_crop}: Missing required columns")
                        debug_file.write(f"Skipping {main_crop}: {filename} missing required columns\n")
                        continue
                    
                    num_samples = len(sub_crop_df)
                    num_unique_subcrops = len(sub_crop_df['sub-crop'].unique())
                    if num_samples < 30 or num_unique_subcrops < 3:
                        skipped_datasets.append(f"{main_crop}: Insufficient samples ({num_samples}) or unique sub-crops ({num_unique_subcrops})")
                        debug_file.write(f"Skipping {main_crop}: Insufficient samples ({num_samples}) or unique sub-crops ({num_unique_subcrops})\n")
                        continue
                    
                    evaluated_datasets.append(f"{main_crop}: {num_samples} samples, {num_unique_subcrops} sub-crops")
                    total_unique_subcrops.update(sub_crop_df['sub-crop'].unique())
                    
                    for _, row in sub_crop_df.iterrows():
                        expected_sub_crop = row['sub-crop']
                        test_input = [row['N'], row['P'], row['K'], row['temperature'], 
                                      row['humidity'], row['ph'], row['rainfall']]
                        
                        result = self.recommend_sub_crops(*test_input, num_recommendations=num_recommendations)
                        
                        if "error" in result:
                            skipped_datasets.append(f"{main_crop}: Recommendation error - {result['error']}")
                            debug_file.write(f"Error for {main_crop}: {result['error']}\n")
                            continue
                        
                        predicted_sub_crops = [item['sub_crop'] for item in result['sub_crops']]
                        predicted_distances = [item['distance'] for item in result['sub_crops']]
                        predicted_features = [item['features'] for item in result['sub_crops']]
                        all_recommended_subcrops.update(predicted_sub_crops)
                        total_tests += 1
                        
                        # Top-3 Accuracy, Recall@3, Hit Rate@3
                        if expected_sub_crop in predicted_sub_crops:
                            correct_matches += 1
                            rank = predicted_sub_crops.index(expected_sub_crop)
                            distances_correct.append(predicted_distances[rank])
                        
                        # Precision@3
                        correct_in_top3 = sum(1 for pred in predicted_sub_crops if pred == expected_sub_crop)
                        precision_sum += correct_in_top3 / num_recommendations
                        
                        # MRR
                        rank = next((i + 1 for i, pred in enumerate(predicted_sub_crops) if pred == expected_sub_crop), 0)
                        reciprocal_rank_sum += (1 / rank) if rank > 0 else 0
                        
                        # NDCG@3
                        dcg = sum((1 / math.log2(i + 2)) if pred == expected_sub_crop else 0 
                                  for i, pred in enumerate(predicted_sub_crops))
                        idcg = 1 / math.log2(2)
                        dcg_sum += dcg / idcg if idcg > 0 else 0
                        
                        # Diversity
                        if len(predicted_features) >= 2:
                            pairwise_distances = []
                            for i in range(len(predicted_features)):
                                for j in range(i + 1, len(predicted_features)):
                                    dist = np.sqrt(np.sum((predicted_features[i] - predicted_features[j]) ** 2))
                                    pairwise_distances.append(dist)
                            diversity_sum += np.mean(pairwise_distances) if pairwise_distances else 0
                        
                        if expected_sub_crop not in predicted_sub_crops:
                            debug_file.write(f"Mismatch for {main_crop}: Expected {expected_sub_crop}, Got {predicted_sub_crops}\n")
                except Exception as e:
                    skipped_datasets.append(f"{main_crop}: Data loading error - {str(e)}")
                    debug_file.write(f"Error loading {main_crop}: {str(e)}\n")
                    continue
            
            accuracy = (correct_matches / total_tests) * 100 if total_tests > 0 else 0.0
            precision_at_3 = (precision_sum / total_tests) * 100 if total_tests > 0 else 0.0
            recall_at_3 = (correct_matches / total_tests) * 100 if total_tests > 0 else 0.0
            f1_score_at_3 = (2 * precision_at_3 * recall_at_3 / (precision_at_3 + recall_at_3)) if (precision_at_3 + recall_at_3) > 0 else 0.0
            mrr = (reciprocal_rank_sum / total_tests) if total_tests > 0 else 0.0
            ndcg_at_3 = (dcg_sum / total_tests) if total_tests > 0 else 0.0
            hit_rate_at_3 = (correct_matches / total_tests) * 100 if total_tests > 0 else 0.0
            avg_distance = sum(distances_correct) / len(distances_correct) if distances_correct else float('inf')
            diversity = (diversity_sum / total_tests) if total_tests > 0 else 0.0
            coverage = len(all_recommended_subcrops) / len(total_unique_subcrops) * 100 if total_unique_subcrops else 0.0
            
            metrics = {
                'Top-3 Accuracy (%)': accuracy,
                'Precision@3 (%)': precision_at_3,
                'Recall@3 (%)': recall_at_3,
                'F1-Score@3 (%)': f1_score_at_3,
                'Mean Reciprocal Rank': mrr,
                'NDCG@3': ndcg_at_3,
                'Hit Rate@3 (%)': hit_rate_at_3,
                'Average Euclidean Distance': avg_distance,
                'Diversity': diversity,
                'Coverage (%)': coverage
            }
            metrics_message = "\n".join(f"{key}: {value:.2f}" for key, value in metrics.items())
            debug_file.write(f"\n{metrics_message}\n")
        
        return metrics, evaluated_datasets, skipped_datasets

# Main Crop Model Training and Evaluation
def train_and_save_main_crop_model():
    try:
        data = pd.read_csv('../datasets/Crop_recommendation.csv')
        X = data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
        y = data['label']
        
        label_encoder = LabelEncoder()
        y_encoded = label_encoder.fit_transform(y)
        
        X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
        
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train_scaled, y_train)
        
        with open('main_crop_model.pkl', 'wb') as file:
            pickle.dump({'model': model, 'label_encoder': label_encoder, 'scaler': scaler}, file)
        
        return model, label_encoder, scaler, X_train_scaled, X_test_scaled, y_train, y_test
    except Exception as e:
        print(f"Error training main crop model: {str(e)}")
        return None, None, None, None, None, None, None

def evaluate_main_crop_model(model, X_test, y_test, label_encoder):
    try:
        preds = model.predict(X_test)
        preds_labels = label_encoder.inverse_transform(preds)
        y_test_labels = label_encoder.inverse_transform(y_test)
        probs = model.predict_proba(X_test)
        confidences = np.max(probs, axis=1)
        cv_scores = cross_val_score(model, X_test, y_test, cv=5, scoring='accuracy')
        return {
            'Accuracy (%)': accuracy_score(y_test, preds) * 100,
            'Precision (Macro) (%)': precision_score(y_test_labels, preds_labels, average='macro', zero_division=0) * 100,
            'Recall (Macro) (%)': recall_score(y_test_labels, preds_labels, average='macro', zero_division=0) * 100,
            'F1-Score (Macro) (%)': f1_score(y_test_labels, preds_labels, average='macro', zero_division=0) * 100,
            'Average Confidence': np.mean(confidences),
            'Low Confidence Rate (%)': (np.sum(confidences < 0.7) / len(confidences)) * 100,
            'CV Accuracy (%)': cv_scores.mean() * 100
        }
    except Exception as e:
        print(f"Error evaluating main crop model: {str(e)}")
        return {
            'Accuracy (%)': 0.0, 'Precision (Macro) (%)': 0.0, 'Recall (Macro) (%)': 0.0,
            'F1-Score (Macro) (%)': 0.0, 'Average Confidence': 0.0,
            'Low Confidence Rate (%)': 0.0, 'CV Accuracy (%)': 0.0
        }

# Model Comparison
def compare_models():
    # Main Crop Comparison
    data = pd.read_csv('../datasets/Crop_recommendation.csv')
    X = data[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
    y = data['label']
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    main_crop_models = {
        "Logistic Regression": LogisticRegression(max_iter=2000),
        "KNN (k=5)": KNeighborsClassifier(n_neighbors=5),
        "SVM (Linear)": SVC(kernel='linear', probability=True),
        "SVM (RBF)": SVC(kernel='rbf', probability=True),
        "Decision Tree": DecisionTreeClassifier(),
        "Random Forest": RandomForestClassifier(n_estimators=100),
        "Naive Bayes": GaussianNB(),
        "XGBoost": XGBClassifier(eval_metric='mlogloss')
    }

    main_crop_results = []
    for name, model in main_crop_models.items():
        try:
            model.fit(X_train_scaled, y_train)
            preds = model.predict(X_test_scaled)
            acc = accuracy_score(y_test, preds) * 100
            main_crop_results.append((name, acc))
        except Exception as e:
            main_crop_results.append((name, 0.0))
            print(f"Error evaluating main crop {name}: {str(e)}")

    # Add Main Crop Model
    try:
        with open('main_crop_model.pkl', 'rb') as file:
            model_data = pickle.load(file)
            preds = model_data['model'].predict(X_test_scaled)
            acc = accuracy_score(y_test, preds) * 100
            main_crop_results.append(("Main Crop Model (Random Forest)", acc))
    except Exception as e:
        main_crop_results.append(("Main Crop Model (Random Forest)", 0.0))
        print(f"Error evaluating Main Crop Model: {str(e)}")

    # Sub-Crop Comparison
    subcrop_models = {
        "Euclidean Distance (SubCropRecommender)": None,
        "KNN (k=5)": KNeighborsClassifier(n_neighbors=5),
        "Random Forest": RandomForestClassifier(n_estimators=100)
    }

    subcrop_results = []
    skipped_datasets = []
    for name, model in subcrop_models.items():
        if name == "Euclidean Distance (SubCropRecommender)":
            try:
                recommender = SubCropRecommender(main_model_path='main_crop_model.pkl', subcrop_dir='C:/Projects/Creative & Innovative Project/datasets/sub_crop_data/')
                metrics, _, skipped = recommender.calculate_subcrop_accuracy()
                subcrop_results.append((name, metrics['Top-3 Accuracy (%)']))
                skipped_datasets.extend(skipped)
            except Exception as e:
                subcrop_results.append((name, 0.0))
                skipped_datasets.append(f"SubCropRecommender: {str(e)}")
                print(f"Error calculating sub-crop accuracy: {str(e)}")
        else:
            try:
                total_tests = 0
                correct_matches = 0
                for main_crop, filename in crop_name_mapping.items():
                    file_path = os.path.join('C:/Projects/Creative & Innovative Project/datasets/sub_crop_data/', filename)
                    if not os.path.exists(file_path):
                        skipped_datasets.append(f"{main_crop}: File {filename} not found")
                        continue
                    sub_crop_df = pd.read_csv(file_path)
                    required_cols = ['sub-crop', 'N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']
                    if not all(col in sub_crop_df.columns for col in required_cols):
                        skipped_datasets.append(f"{main_crop}: Missing required columns")
                        continue
                    if len(sub_crop_df) < 30 or len(sub_crop_df['sub-crop'].unique()) < 3:
                        skipped_datasets.append(f"{main_crop}: Insufficient samples ({len(sub_crop_df)}) or sub-crops ({len(sub_crop_df['sub-crop'].unique())})")
                        continue
                    X = sub_crop_df[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']]
                    y = sub_crop_df['sub-crop']
                    le = LabelEncoder()
                    y_encoded = le.fit_transform(y)
                    X_train_sub, X_test_sub, y_train_sub, y_test_sub = train_test_split(X, y_encoded, test_size=0.1, random_state=42)
                    if len(X_test_sub) < 3 or len(X_train_sub) < 4:
                        skipped_datasets.append(f"{main_crop}: Insufficient test ({len(X_test_sub)}) or train ({len(X_train_sub)}) samples")
                        continue
                    model.fit(X_train_sub, y_train_sub)
                    for i in range(len(X_test_sub)):
                        input_vector = X_test_sub.iloc[i].values.reshape(1, -1)
                        expected = le.inverse_transform([y_test_sub[i]])[0]
                        if isinstance(model, KNeighborsClassifier):
                            n_neighbors = min(3, len(X_train_sub) - 1)
                            distances, indices = model.kneighbors(input_vector, n_neighbors=n_neighbors)
                            valid_indices = indices[0][indices[0] < len(X_train_sub)]
                            if len(valid_indices) == 0:
                                continue
                            predicted = le.inverse_transform(model.predict(X_train_sub.iloc[valid_indices]))
                        else:
                            probs = model.predict_proba(input_vector)[0]
                            valid_classes = np.arange(len(probs))[probs > 0]
                            if len(valid_classes) < 1:
                                continue
                            top_indices = np.argsort(probs[valid_classes])[-min(3, len(valid_classes)):][::-1]
                            predicted = le.inverse_transform(valid_classes[top_indices])
                        total_tests += 1
                        if expected in predicted:
                            correct_matches += 1
                acc = (correct_matches / total_tests) * 100 if total_tests > 0 else 0.0
                subcrop_results.append((name, acc))
            except Exception as e:
                subcrop_results.append((name, 0.0))
                skipped_datasets.append(f"{name}: {str(e)}")
                print(f"Error evaluating sub-crop {name}: {str(e)}")

    return main_crop_results, subcrop_results, list(set(skipped_datasets))

# PDF Generation
def header_footer(canvas, doc):
    canvas.saveState()
    # Header
    canvas.setFont('Helvetica-Bold', 10)
    canvas.setFillColor(DARK_GREEN)
    canvas.drawString(0.75 * inch, doc.pagesize[1] - 0.75 * inch, "Crop Combination Recommendation and Price Prediction")
    canvas.setFont('Helvetica', 8)
    canvas.setFillColor(GRAY)
    canvas.drawRightString(doc.pagesize[0] - 0.75 * inch, doc.pagesize[1] - 0.75 * inch, f"Page {doc.page}")
    canvas.setLineWidth(0.5)
    canvas.setStrokeColor(GRAY)
    canvas.line(0.75 * inch, doc.pagesize[1] - 0.85 * inch, doc.pagesize[0] - 0.75 * inch, doc.pagesize[1] - 0.85 * inch)
    # Footer
    canvas.setFont('Helvetica', 8)
    canvas.setFillColor(GRAY)
    canvas.drawCentredString(doc.pagesize[0] / 2, 0.5 * inch, f"Page {doc.page}")
    canvas.restoreState()

def format_table_cell(text, is_header=False):
    style = ParagraphStyle(
        name='TableCell' if not is_header else 'TableHeader',
        fontName='Helvetica-Bold' if is_header else 'Times-Roman',
        fontSize=9,
        textColor=colors.white if is_header else BLACK,
        alignment=1,
        leading=10,
        wordWrap='CJK'
    )
    return Paragraph(str(text), style)

def generate_pdf_report(main_crop_comp, subcrop_comp, main_crop_metrics, subcrop_metrics, evaluated_datasets, skipped_datasets, output_filename="Crop_Recommendation_Report.pdf"):
    doc = SimpleDocTemplate(output_filename, pagesize=letter, rightMargin=0.75 * inch, leftMargin=0.75 * inch, topMargin=1 * inch, bottomMargin=1 * inch)
    styles = getSampleStyleSheet()

    # Custom Styles
    cover_title_style = ParagraphStyle(
        name='CoverTitle', fontName='Helvetica-Bold', fontSize=20, textColor=DARK_GREEN, alignment=1, spaceAfter=12
    )
    cover_subtitle_style = ParagraphStyle(
        name='CoverSubtitle', fontName='Helvetica', fontSize=12, textColor=BLACK, alignment=1, spaceAfter=10
    )
    heading_style = ParagraphStyle(
        name='Heading2', fontName='Helvetica-Bold', fontSize=14, textColor=DARK_BLUE, spaceBefore=14, spaceAfter=8
    )
    body_style = ParagraphStyle(
        name='BodyText', fontName='Times-Roman', fontSize=10, leading=12, textColor=BLACK, spaceAfter=10, alignment=4, wordWrap='CJK'
    )

    elements = []

    # Cover Page
    elements.append(Spacer(1, 3 * inch))
    elements.append(Paragraph("Crop Combination Recommendation and Price Prediction", cover_title_style))
    elements.append(Spacer(1, 0.2 * inch))
    elements.append(HRFlowable(width=4 * inch, thickness=1, color=DARK_GREEN, spaceBefore=0, spaceAfter=0, hAlign='CENTER'))
    elements.append(Spacer(1, 0.3 * inch))
    elements.append(Paragraph("CS6611 Creative and Innovative Project", cover_subtitle_style))
    elements.append(Paragraph("Submitted by: [Your Name]", cover_subtitle_style))
    elements.append(Paragraph(f"Date: {datetime.now().strftime('%Y-%m-%d')}", cover_subtitle_style))
    elements.append(Spacer(1, 2.5 * inch))
    elements.append(Paragraph("Department of Computer Science", cover_subtitle_style))
    elements.append(Paragraph("[Your University Name]", cover_subtitle_style))
    elements.append(PageBreak())

    # Content Template
    frame = Frame(doc.leftMargin, doc.bottomMargin, doc.width, doc.height - 1.2 * inch)
    template = PageTemplate(id='content', frames=[frame], onPage=header_footer)
    doc.addPageTemplates([template])
    elements.append(NextPageTemplate('content'))

    # Introduction
    elements.append(Paragraph("Introduction", heading_style))
    elements.append(Paragraph(
        "This report, part of the CS6611 Creative and Innovative Project titled 'Crop Combination Recommendation and Price Prediction,' evaluates machine learning models for recommending main crops and their sub-crops. "
        "The main crop model uses a Random Forest Classifier, while the sub-crop model employs a Euclidean Distance-based approach (SubCropRecommender), akin to KNN, to rank sub-crops. "
        "The report compares these models against alternatives, presents detailed performance metrics, and addresses data challenges, particularly for sub-crop datasets, using the Crop Recommendation dataset and sub-crop datasets.",
        body_style
    ))
    elements.append(Spacer(1, 0.2 * inch))

    # Methodology
    elements.append(Paragraph("Methodology", heading_style))
    elements.append(Paragraph(
        "The Crop Recommendation dataset (~2200 samples, 22 crops) was preprocessed with LabelEncoder and StandardScaler, split into 80% training and 20% testing sets for main crop prediction. "
        "Models compared include Logistic Regression, KNN, SVM (Linear and RBF), Decision Tree, Random Forest, Naive Bayes, and XGBoost. Metrics for main crop include accuracy, precision, recall, F1-score (macro-averaged), average confidence, low confidence rate (<0.7), and 5-fold cross-validation accuracy. "
        "For sub-crop recommendation, the SubCropRecommender uses Euclidean distance, compared with KNN and Random Forest on sub-crop datasets (30-200 samples) with a 30-sample and 3-sub-crop minimum threshold. "
        "Sub-crop metrics include top-3 accuracy, precision@3, recall@3, F1-score@3, MRR, NDCG@3, hit rate@3, average Euclidean distance, diversity, and coverage.",
        body_style
    ))
    elements.append(Spacer(1, 0.2 * inch))

    # Data Challenges
    elements.append(Paragraph("Data Challenges", heading_style))
    elements.append(Paragraph(
        "Main crop prediction used a robust dataset (~2200 samples, 22 crops), ensuring reliable metrics. However, sub-crop recommendation faced significant challenges due to small dataset sizes (30-50 samples for most crops, 200 for Grapes but only 2 sub-crops) and missing files (e.g., Black Gram Dal). "
        "Many datasets were skipped due to insufficient samples (<30), too few unique sub-crops (<3), or file errors, leading to limited evaluation. These issues highlight the need for larger, standardized sub-crop datasets.",
        body_style
    ))
    elements.append(Spacer(1, 0.2 * inch))

    # Results
    elements.append(Paragraph("Results", heading_style))
    elements.append(Paragraph(
        "The following sections present model comparisons and performance metrics for main crop prediction and sub-crop recommendation. "
        "Main crop results are robust, while sub-crop results are constrained by data limitations, as detailed in the Sub-Crop Dataset Summary.",
        body_style
    ))
    elements.append(Spacer(1, 0.2 * inch))

    # Main Crop Model Comparison
    elements.append(Paragraph("Main Crop Model Comparison", heading_style))
    total_width = doc.width
    colWidths = [total_width * 0.6, total_width * 0.4]
    table_data = [[format_table_cell("Model", is_header=True), format_table_cell("Accuracy (%)", is_header=True)]]
    for i, (name, acc) in enumerate(main_crop_comp):
        table_data.append([format_table_cell(name), format_table_cell(f"{acc:.2f}")])
    table = Table(table_data, colWidths=colWidths, rowHeights=[0.3 * inch] * len(table_data))
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), DARK_GREEN),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 6),
        ('TOPPADDING', (0, 0), (-1, 0), 6),
        ('BOTTOMPADDING', (0, 1), (-1, -1), 4),
        ('TOPPADDING', (0, 1), (-1, -1), 4),
        ('BACKGROUND', (0, 1), (-1, -1), colors.white),
        ('BACKGROUND', (0, 2), (-1, 2), SOFT_GRAY),
        ('BACKGROUND', (0, 4), (-1, 4), SOFT_GRAY),
        ('BACKGROUND', (0, 6), (-1, 6), SOFT_GRAY),
        ('BACKGROUND', (0, 8), (-1, 8), SOFT_GRAY),
        ('GRID', (0, 0), (-1, -1), 0.5, BLACK),
        ('LEFTPADDING', (0, 0), (-1, -1), 8),
        ('RIGHTPADDING', (0, 0), (-1, -1), 8),
    ]))
    elements.append(KeepTogether(table))
    elements.append(Spacer(1, 0.3 * inch))

    # Sub-Crop Model Comparison
    elements.append(Paragraph("Sub-Crop Model Comparison", heading_style))
    table_data = [[format_table_cell("Model", is_header=True), format_table_cell("Top-3 Accuracy (%)", is_header=True)]]
    for i, (name, acc) in enumerate(subcrop_comp):
        table_data.append([format_table_cell(name), format_table_cell(f"{acc:.2f}")])
    table = Table(table_data, colWidths=colWidths, rowHeights=[0.3 * inch] * len(table_data))
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), DARK_GREEN),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 6),
        ('TOPPADDING', (0, 0), (-1, 0), 6),
        ('BOTTOMPADDING', (0, 1), (-1, -1), 4),
        ('TOPPADDING', (0, 1), (-1, -1), 4),
        ('BACKGROUND', (0, 1), (-1, -1), colors.white),
        ('BACKGROUND', (0, 2), (-1, 2), SOFT_GRAY),
        ('GRID', (0, 0), (-1, -1), 0.5, BLACK),
        ('LEFTPADDING', (0, 0), (-1, -1), 8),
        ('RIGHTPADDING', (0, 0), (-1, -1), 8),
    ]))
    elements.append(KeepTogether(table))
    elements.append(Spacer(1, 0.3 * inch))

    # Main Crop Performance Metrics
    elements.append(Paragraph("Main Crop Performance Metrics (Random Forest)", heading_style))
    colWidths = [total_width * 0.5, total_width * 0.5]
    table_data = [[format_table_cell("Metric", is_header=True), format_table_cell("Value", is_header=True)]]
    for i, (key, value) in enumerate(main_crop_metrics.items()):
        value_str = f"{value:.2f}" if isinstance(value, (int, float)) else str(value)
        table_data.append([format_table_cell(key), format_table_cell(value_str)])
    table = Table(table_data, colWidths=colWidths, rowHeights=[0.3 * inch] * len(table_data))
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), DARK_GREEN),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 6),
        ('TOPPADDING', (0, 0), (-1, 0), 6),
        ('BOTTOMPADDING', (0, 1), (-1, -1), 4),
        ('TOPPADDING', (0, 1), (-1, -1), 4),
        ('BACKGROUND', (0, 1), (-1, -1), colors.white),
        ('BACKGROUND', (0, 2), (-1, 2), SOFT_GRAY),
        ('BACKGROUND', (0, 4), (-1, 4), SOFT_GRAY),
        ('BACKGROUND', (0, 6), (-1, 6), SOFT_GRAY),
        ('GRID', (0, 0), (-1, -1), 0.5, BLACK),
        ('LEFTPADDING', (0, 0), (-1, -1), 8),
        ('RIGHTPADDING', (0, 0), (-1, -1), 8),
    ]))
    elements.append(KeepTogether(table))
    elements.append(Spacer(1, 0.3 * inch))

    # Sub-Crop Performance Metrics
    elements.append(Paragraph("Sub-Crop Performance Metrics (Euclidean Distance)", heading_style))
    table_data = [[format_table_cell("Metric", is_header=True), format_table_cell("Value", is_header=True)]]
    for i, (key, value) in enumerate(subcrop_metrics.items()):
        value_str = f"{value:.2f}" if isinstance(value, (int, float)) and not np.isinf(value) else "N/A"
        table_data.append([format_table_cell(key), format_table_cell(value_str)])
    table = Table(table_data, colWidths=colWidths, rowHeights=[0.3 * inch] * len(table_data))
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), DARK_GREEN),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('BOTTOMPADDING', (0, 0), (-1, 0), 6),
        ('TOPPADDING', (0, 0), (-1, 0), 6),
        ('BOTTOMPADDING', (0, 1), (-1, -1), 4),
        ('TOPPADDING', (0, 1), (-1, -1), 4),
        ('BACKGROUND', (0, 1), (-1, -1), colors.white),
        ('BACKGROUND', (0, 2), (-1, 2), SOFT_GRAY),
        ('BACKGROUND', (0, 4), (-1, 4), SOFT_GRAY),
        ('BACKGROUND', (0, 6), (-1, 6), SOFT_GRAY),
        ('BACKGROUND', (0, 8), (-1, 8), SOFT_GRAY),
        ('GRID', (0, 0), (-1, -1), 0.5, BLACK),
        ('LEFTPADDING', (0, 0), (-1, -1), 8),
        ('RIGHTPADDING', (0, 0), (-1, -1), 8),
    ]))
    elements.append(KeepTogether(table))
    elements.append(Spacer(1, 0.3 * inch))

    # Sub-Crop Dataset Summary
    elements.append(Paragraph("Sub-Crop Dataset Summary", heading_style))
    colWidths = [total_width * 0.4, total_width * 0.2, total_width * 0.2, total_width * 0.2]
    table_data = [[format_table_cell(col, is_header=True) for col in ["Main Crop", "Samples", "Unique Sub-Crops", "Status"]]]
    for main_crop, filename in crop_name_mapping.items():
        file_path = os.path.join('C:/Projects/Creative & Innovative Project/datasets/sub_crop_data/', filename)
        status = "Skipped"
        samples = "N/A"
        sub_crops = "N/A"
        if os.path.exists(file_path):
            try:
                df = pd.read_csv(file_path)
                samples = len(df)
                sub_crops = len(df['sub-crop'].unique()) if 'sub-crop' in df.columns else 0
                status = "Evaluated" if samples >= 30 and sub_crops >= 3 else "Skipped"
            except:
                status = "Error"
        table_data.append([format_table_cell(main_crop), format_table_cell(str(samples)), format_table_cell(str(sub_crops)), format_table_cell(status)])
    table = Table(table_data, colWidths=colWidths, rowHeights=[0.3 * inch] * len(table_data))
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), DARK_GREEN),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
        ('TOPPADDING', (0, 0), (-1, -1), 6),
        ('BACKGROUND', (0, 1), (-1, -1), colors.white),
        ('GRID', (0, 0), (-1, -1), 0.5, BLACK),
        ('LEFTPADDING', (0, 0), (-1, -1), 8),
        ('RIGHTPADDING', (0, 0), (-1, -1), 8),
    ]))
    elements.append(KeepTogether(table))
    elements.append(Spacer(1, 0.3 * inch))

    # Summary
    elements.append(Paragraph("Summary", heading_style))
    top_main_crop = max(main_crop_comp, key=lambda x: x[1], default=("None", 0))[0]
    top_main_acc = max([x[1] for x in main_crop_comp], default=0)
    top_sub_crop = max(subcrop_comp, key=lambda x: x[1], default=("None", 0))[0]
    top_sub_acc = max([x[1] for x in subcrop_comp], default=0)
    summary_text = (
        f"The Main Crop Model (Random Forest) excelled in main crop prediction, with {top_main_crop} achieving {top_main_acc:.2f}% accuracy. "
        f"For sub-crop recommendation, {top_sub_crop} led with {top_sub_acc:.2f}% top-3 accuracy, though results were limited by small datasets (30-50 samples) and missing files, as shown in the Sub-Crop Dataset Summary. "
        f"The Random Forest model demonstrated robust performance across metrics, while the SubCropRecommender’s Euclidean Distance approach requires enhanced data for reliable evaluation."
    )
    elements.append(Paragraph(summary_text, body_style))
    elements.append(Spacer(1, 0.2 * inch))

    # Discussion
    elements.append(Paragraph("Discussion", heading_style))
    elements.append(Paragraph(
        "The Random Forest model and XGBoost outperformed other models in main crop prediction, leveraging ensemble techniques to capture complex feature interactions, with accuracies above 98%. "
        "The SubCropRecommender’s Euclidean Distance approach, akin to KNN, showed potential but was hindered by small datasets, resulting in limited or zero metrics for most crops. "
        "KNN and Random Forest for sub-crops also faced data constraints, emphasizing the need for larger, standardized sub-crop datasets. "
        "Cross-validation and confidence metrics confirm the main crop model’s reliability, while sub-crop metrics like NDCG@3 and diversity highlight ranking quality when data is sufficient.",
        body_style
    ))
    elements.append(Spacer(1, 0.2 * inch))

    # Conclusion
    elements.append(Paragraph("Conclusion", heading_style))
    elements.append(Paragraph(
        "This report, part of the CS6611 project, validates the Random Forest model for main crop prediction and evaluates the SubCropRecommender for sub-crop recommendation. "
        "While main crop prediction is highly accurate, sub-crop recommendation requires improved datasets to achieve reliable performance. "
        "Future work will integrate price prediction using market data (e.g., from Agmarknet) and expand sub-crop datasets to enhance agricultural decision-making.",
        body_style
    ))
    elements.append(Spacer(1, 0.2 * inch))

    # Appendix: Evaluated and Skipped Datasets
    elements.append(Paragraph("Appendix: Dataset Details", heading_style))
    # Evaluated Datasets Table
    elements.append(Paragraph("Evaluated Sub-Crop Datasets", heading_style))
    colWidths = [total_width * 0.7, total_width * 0.15, total_width * 0.15]
    table_data = [[format_table_cell(col, is_header=True) for col in ["Dataset", "Samples", "Sub-Crops"]]]
    if evaluated_datasets:
        for dataset in evaluated_datasets:
            parts = dataset.split(": ")
            name = parts[0]
            samples, sub_crops = parts[1].split(" samples, ")
            sub_crops = sub_crops.split(" ")[0]
            table_data.append([format_table_cell(name), format_table_cell(samples), format_table_cell(sub_crops)])
    else:
        table_data.append([format_table_cell("None"), format_table_cell("N/A"), format_table_cell("N/A")])
    table = Table(table_data, colWidths=colWidths, rowHeights=[0.3 * inch] * len(table_data))
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), DARK_GREEN),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
        ('TOPPADDING', (0, 0), (-1, -1), 6),
        ('BACKGROUND', (0, 1), (-1, -1), colors.white),
        ('GRID', (0, 0), (-1, -1), 0.5, BLACK),
        ('LEFTPADDING', (0, 0), (-1, -1), 8),
        ('RIGHTPADDING', (0, 0), (-1, -1), 8),
    ]))
    elements.append(KeepTogether(table))
    elements.append(Spacer(1, 0.3 * inch))
    # Skipped Datasets Table
    elements.append(Paragraph("Skipped Sub-Crop Datasets", heading_style))
    colWidths = [total_width]
    table_data = [[format_table_cell("Reason", is_header=True)]]
    if skipped_datasets:
        for reason in skipped_datasets:
            table_data.append([format_table_cell(reason)])
    else:
        table_data.append([format_table_cell("None")])
    table = Table(table_data, colWidths=colWidths, rowHeights=[0.3 * inch] * len(table_data))
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), DARK_GREEN),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.white),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
        ('TOPPADDING', (0, 0), (-1, -1), 6),
        ('BACKGROUND', (0, 1), (-1, -1), colors.white),
        ('GRID', (0, 0), (-1, -1), 0.5, BLACK),
        ('LEFTPADDING', (0, 0), (-1, -1), 8),
        ('RIGHTPADDING', (0, 0), (-1, -1), 8),
    ]))
    elements.append(KeepTogether(table))
    elements.append(Spacer(1, 0.3 * inch))

    # Build PDF
    try:
        doc.build(elements)
        print(f"Report generated: {output_filename}")
    except Exception as e:
        print(f"Error generating PDF: {str(e)}")

# Main Execution
if __name__ == "__main__":
    # Train Main Crop Model
    model, label_encoder, scaler, X_train_scaled, X_test_scaled, y_train, y_test = train_and_save_main_crop_model()
    if model is None:
        print("Failed to train main crop model. Aborting.")
        exit(1)

    # Evaluate Main Crop Metrics
    main_crop_metrics = evaluate_main_crop_model(model, X_test_scaled, y_test, label_encoder)

    # Compare Models
    main_crop_comp, subcrop_comp, skipped_datasets = compare_models()

    # Evaluate Sub-Crop Metrics
    recommender = SubCropRecommender(main_model_path='main_crop_model.pkl', subcrop_dir='C:/Projects/Creative & Innovative Project/datasets/sub_crop_data/')
    subcrop_metrics, evaluated_datasets, subcrop_skipped = recommender.calculate_subcrop_accuracy()
    skipped_datasets.extend(subcrop_skipped)

    # Generate PDF
    generate_pdf_report(main_crop_comp, subcrop_comp, main_crop_metrics, subcrop_metrics, evaluated_datasets, skipped_datasets)