# Final ML Model Deploy v3.0 - Test the Current RF Model

### Predicting - Highest Income / Harvest Month / Cultivation Month / Neighbor Locations / Best Location

In [12]:
import joblib
import pandas as pd
import datetime

# Load the Random Forest model, encoder, and scalers
rf_model_filename = './fun1/rf_model.pkl'
encoder_filename = './fun1/encoder.pkl'
scaler_X_filename = './fun1/scaler_X.pkl'
scaler_y_filename = './fun1/scaler_y.pkl'

rf_model = joblib.load(rf_model_filename)
encoder = joblib.load(encoder_filename)
scaler_X = joblib.load(scaler_X_filename)
scaler_y = joblib.load(scaler_y_filename)

# Load the neighbor locations mapping
neighbor_mapping_file = './fun1/location_list_mapping.csv'
neighbor_mapping = pd.read_csv(neighbor_mapping_file, index_col='locations')

# Function to get price predictions
def predict_price(year, month, location):
    # Create a DataFrame with the user input
    input_data = pd.DataFrame({'year': [year], 'month': [month], 'location': [location]})

    # Encode location using the loaded encoder
    location_encoded = encoder.transform(input_data[['location']])
    location_encoded_df = pd.DataFrame(location_encoded, columns=encoder.get_feature_names_out(['location']))
    location_encoded_df.columns = location_encoded_df.columns.str.split('_').str[-1]

    # Drop the 'location' column and concatenate the encoded 'location' columns
    input_data.drop(columns=['location'], inplace=True)
    input_data = pd.concat([input_data, location_encoded_df], axis=1)

    # Standardize the input data for independent variables (X)
    input_data_x = pd.DataFrame(scaler_X.transform(input_data), columns=input_data.columns)

    # Make price predictions using the Random Forest model
    price_predictions_y = rf_model.predict(input_data_x)

    # Inverse transform the predicted values for dependent variables (y) to get them back to their original scale
    price_predictions = scaler_y.inverse_transform(price_predictions_y)

    return price_predictions[0][0], price_predictions[0][1]  # Return both Wholesale and Retail prices

# Function to calculate the next 12 months
def get_upcoming_months(year, month, num_months):
    upcoming_months = []
    for _ in range(num_months):
        if month == 12:
            month = 1
            year += 1
        else:
            month += 1
        upcoming_months.append((year, month))
    return upcoming_months

# User inputs
current_date = datetime.date.today()
current_year = current_date.year
current_month = current_date.month

## User input for location and predictions duration
location = 'Colombo'
num_months = 6

## User input for expected harvest amount (in kg) and retail ratio
expected_harvest_amount = 50
retail_ratio = 1.0  # Retail ratio of 60%, wholesale is 40%

## User input for harvesting duration
harvest_duration = 7  # Assuming 7 weeks for harvesting duration

# Initialize variables to track highest income
highest_income = 0
highest_income_month = None
selected_location_income = 0

# Get predictions for the upcoming months and calculate total predicted income
for year, month in get_upcoming_months(current_year, current_month, num_months):
    wholesale_price, retail_price = predict_price(year, month, location)
    
    # Calculate total predicted income based on expected harvest amount and retail ratio
    total_income = (wholesale_price * (1 - retail_ratio) + retail_price * retail_ratio) * expected_harvest_amount
    
    # Check if the current month has higher income
    if total_income > highest_income:
        highest_income = total_income
        highest_income_month = (year, month)
        selected_location_income = total_income  # Save income for the selected location

# Calculate the start month for cultivation based on harvesting duration
if highest_income_month:
    harvest_year, harvest_month = highest_income_month
    start_date = datetime.date(harvest_year, harvest_month, 1) - datetime.timedelta(weeks=harvest_duration)
    start_month = start_date.month
    start_year = start_date.year
    
    highest_income_str = f"{highest_income:.2f}"
    highest_income_month_str = f"{datetime.date(harvest_year, harvest_month, 1).strftime('%B - %Y')}"
    cultivation_start_str = f"{datetime.date(start_year, start_month, 1).strftime('%B - %Y')}"

    print(f"{location} - Highest Income Rs: {highest_income_str}")
    
    print(f"\nWholesale Price: {wholesale_price:.2f}")
    print(f"Retail Price: {retail_price:.2f}")
    
    print(f"\nHarvest Month: {highest_income_month_str}")
    print(f"Start Cultivation before the end of: {cultivation_start_str}")

    # Get neighboring locations and their predicted incomes
    neighbors = neighbor_mapping.loc[location].dropna().tolist()
    neighbor_incomes = []

    for neighbor in neighbors:
        neighbor_wholesale_price, neighbor_retail_price = predict_price(harvest_year, harvest_month, neighbor)
        neighbor_income = (neighbor_wholesale_price * (1 - retail_ratio) + neighbor_retail_price * retail_ratio) * expected_harvest_amount
        neighbor_incomes.append((neighbor, neighbor_income))

    # Sort neighbors by income (from high to low)
    sorted_neighbors = sorted(neighbor_incomes, key=lambda x: x[1], reverse=True)

    print("\nNeighbor Locations - Highest Income Rs:")
    for neighbor, income in sorted_neighbors:
        print(f"{neighbor} - {income:.2f}")

    # Identify the location with the highest income among neighbors
    best_neighbor = sorted_neighbors[0][0]
    best_neighbor_income = sorted_neighbors[0][1]

    print(f"\nBest Neighbor Location - Highest Income Rs:")
    print(f"{best_neighbor} - {best_neighbor_income:.2f}")

    # Compare the selected location with the best neighbor
    if selected_location_income > best_neighbor_income:
        print("\nThe selected location has the highest income.")
    elif selected_location_income < best_neighbor_income:
        print(f"\nThe best neighbor location ({best_neighbor}) has the highest income.")
    else:
        print("\nThe selected location and the best neighbor location have the same income.")

Colombo - Highest Income Rs: 9577.92

Wholesale Price: 97.73
Retail Price: 188.20

Harvest Month: June - 2024
Start Cultivation before the end of: April - 2024

Neighbor Locations - Highest Income Rs:
Gampaha - 10166.58
Kaluthara - 9668.96
Meegoda - 9142.20
Veyangoda - 8878.39

Best Neighbor Location - Highest Income Rs:
Gampaha - 10166.58

The best neighbor location (Gampaha) has the highest income.


------------------

**You can use this list of locations in the above script for predictions**

In [None]:
locations = [
    'Galle',
    'Thissamaharama',
    'Rathnapura',
    'Gampaha',
    'Bandarawela',
    'Embilipitiya',
    'Veyangoda',
    'Meegoda',
    'Kurunegala',
    'Keppetipola',
    'Dehiattakandiya',
    'Hambanthota',
    'Jaffna',
    'Polonnaruwa',
    'Nikaweratiya',
    'Trinco',
    'Kaluthara',
    'Badulla',
    'Anuradapuraya',
    'Vavuniya',
    'Matale',
    'Mannar',
    'Dabulla',
    'Mullathivu',
    'Kandy',
    'Matara',
    'Thabuththegama',
    'Nuwara Eliya',
    'Ampara',
    'Monaragala',
    'Colombo',
    'Hanguranketha',
    'Puttalam',
    'Batticaloa',
    'Kegalle',
    'Galenbidunuwewa',
    'Kilinochchi'
]