In [1]:
1+1

2

In [None]:
import random
import os
import gc
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image as PImage
from fastai.vision.all import *
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [None]:
# Set Random Seed for Reproducibility
random.seed(3)

In [None]:
# Set Random Seed for Reproducibility
random.seed(3)

# Authenticate and Mount Google Drive
from google.colab import auth
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from oauth2client.client import GoogleCredentials
from google.colab import drive

def mount_drive():
    try:
        auth.authenticate_user()
        gauth = GoogleAuth()
        gauth.credentials = GoogleCredentials.get_application_default()
        drive = GoogleDrive(gauth)
        drive.mount('/content/gdrive')
        print("Google Drive mounted successfully.")
    except Exception as e:
        print(f"Error mounting Google Drive: {e}")

mount_drive()

In [None]:
import random
import os
import gc
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image as PImage
from fastai.vision.all import *
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Set Random Seed for Reproducibility
random.seed(3)

# Authenticate and Mount Google Drivenow 
from google.colab import auth
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from oauth2client.client import GoogleCredentials
from google.colab import drive

def mount_drive():
    try:
        auth.authenticate_user()
        gauth = GoogleAuth()
        gauth.credentials = GoogleCredentials.get_application_default()
        drive = GoogleDrive(gauth)
        drive.mount('/content/gdrive')
        print("Google Drive mounted successfully.")
    except Exception as e:
        print(f"Error mounting Google Drive: {e}")

mount_drive()

# Unzip Files
def unzip_files():
    try:
        !unzip -q "/content/gdrive/My Drive/CGIAR_Wheat_SampleSubmission.zip"
        !unzip -q "/content/gdrive/My Drive/CGIAR_Wheat_Train_Dataset.zip"
        !unzip -q "/content/gdrive/My Drive/Images.zip"
        print("Files unzipped successfully.")
    except Exception as e:
        print(f"Error unzipping files: {e}")

unzip_files()

# Load Training Data
def load_data():
    try:
        train = pd.read_csv('Train.csv')
        train_hq = train[train['label_quality'] == 2]
        print(f"Number of high-quality images: {train_hq.shape[0]}")
        print(f"Growth stages in high-quality data: {train_hq['growth_stage'].unique()}")
        return train_hq
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

train_hq = load_data()

# Exploratory Data Analysis
def eda(train_hq):
    try:
        plt.figure(figsize=(10, 6))
        sns.countplot(x=train_hq['growth_stage'])
        plt.title("Distribution of Growth Stages in High-Quality Data")
        plt.xlabel("Growth Stage")
        plt.ylabel("Count")
        plt.show()
    except Exception as e:
        print(f"Error performing EDA: {e}")

eda(train_hq)

# Image Transformations
def get_aug_transforms():
    try:
        aug_transforms = setup_aug_tfms([
            FlipItem(p=0.5),
            Rotate(max_deg=25, p=0.6),
            RandomResizedCrop(p=1., size=256),
            Brightness(max_lighting=0.9, p=0.6),
            Contrast(max_lighting=0.9, p=0.6),
            Zoom(max_zoom=1.3, p=0.6),
            Warp(magnitude=0.3, p=0.3)
        ])
        return aug_transforms
    except Exception as e:
        print(f"Error setting up augmentations: {e}")
        return None

aug_transforms = get_aug_transforms()

# Create Data Loaders
def create_dataloaders(train_hq, aug_transforms):
    try:
        dls = ImageDataLoaders.from_df(
            train_hq,  # Use high-quality dataset
            path='Images/',
            suff='.jpeg',
            label_col='growth_stage',
            item_tfms=Resize(224),
            y_block=RegressionBlock(),
            batch_tfms=aug_transforms
        )
        dls.show_batch()
        return dls
    except Exception as e:
        print(f"Error creating dataloaders: {e}")
        return None

dls = create_dataloaders(train_hq, aug_transforms)

# Train the Model
def train_model(dls, model_arch, metrics):
    try:
        learn = cnn_learner(dls, model_arch, metrics=metrics, pretrained=True).to_fp16()
        learn.fine_tune(5)
        learn.recorder.plot_loss()
        learn.unfreeze()
        learn.lr_find()
        learn.fit_one_cycle(40, lr_max=slice(1e-7, 1e-5))
        return learn
    except Exception as e:
        print(f"Error training model: {e}")
        return None

metrics = [rmse, mae, mse, R2Score()]

learn = train_model(dls, 'resnet50', metrics)

# Make Test Predictions
def make_predictions(learn, dls):
    try:
        ss = pd.read_csv('SampleSubmission.csv')
        tdl = learn.dls.test_dl(ss)
        preds = learn.tta(dl=tdl)
        return preds
    except Exception as e:
        print(f"Error making predictions: {e}")
        return None

preds = make_predictions(learn, dls)

# Create Submission
def create_submission(preds):
    try:
        ss = pd.read_csv('SampleSubmission.csv')
        ss['growth_stage'] = preds[0].numpy()
        return ss
    except Exception as e:
        print(f"Error creating submission: {e}")
        return None

submission = create_submission(preds)

# Save Submission
def save_submission(submission, filename='submission.csv'):
    try:
        submission[['UID', 'growth_stage']].to_csv(filename, index=False)
        print(f"Submission saved as {filename}")
    except Exception as e:
        print(f"Error saving submission: {e}")

save_submission(submission, '/gdrive/My Drive/submission.csv')

# Enhanced Visualization
def visualize_results(learn, dls):
    try:
        # Get predictions and actual values
        preds, targs = learn.get_preds(dl=dls)
        
        # Calculate metrics
        rmse_val = rmse(preds, targs)
        mae_val = mae(preds, targs)
        mse_val = mse(preds, targs)
        r2_val = r2_score(preds, targs)
        
        # Create a figure with multiple subplots
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        
        # Plot 1: Predictions vs Actuals
        sns.scatterplot(x=targs.numpy(), y=preds.numpy(), ax=axes[0, 0])
        axes[0, 0].set_title('Predictions vs Actuals')
        axes[0, 0].set_xlabel('Actual Growth Stage')
        axes[0, 0].set_ylabel('Predicted Growth Stage')
        axes[0, 0].plot([targs.numpy().min(), targs.numpy().max()], [targs.numpy().min(), targs.numpy().max()], 'r--')
        
        # Plot 2: Error Distribution
        errors = preds.numpy() - targs.numpy()
        sns.histplot(errors, ax=axes[0, 1])
        axes[0, 1].set_title('Error Distribution')
        axes[0, 1].set_xlabel('Error (Predicted - Actual)')
        axes[0, 1].set_ylabel('Frequency')
        
        # Plot 3: Growth Stage Distribution
        sns.countplot(x=train_hq['growth_stage'], ax=axes[1, 0])
        axes[1, 0].set_title('Growth Stage Distribution')
        axes[1, 0].set_xlabel('Growth Stage')
        axes[1, 0].set_ylabel('Count')
        
        # Plot 4: Actual vs Predicted with Regression Line
        sns.regplot(x=targs.numpy(), y=preds.numpy(), ax=axes[1, 1])
        axes[1, 1].set_title('Actual vs Predicted with Regression Line')
        axes[1, 1].set_xlabel('Actual Growth Stage')
        axes[1, 1].set_ylabel('Predicted Growth Stage')
        
        # Add metrics to the plots
        axes[0, 0].text(0.7, 0.9, f'RMSE: {rmse_val:.4f}\nMAE: {mae_val:.4f}\nRÂ²: {r2_val:.4f}', transform=axes[0, 0].transAxes)
        
        # Layout so plots do not overlap
        plt.tight_layout()
        
        plt.show()
    except Exception as e:
        print(f"Error visualizing results: {e}")

visualize_results(learn, dls)
