In [None]:
import pandas as pd
import numpy as np

dataset = pd.read_csv('./Rwanda_altitude/rwanda_altitude.csv')
dataset.describe()
dataset.info()
dataset.head()
dataset.columns

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib
import os

# Create directory for saving models if it doesn't exist
os.makedirs('./models', exist_ok=True)

# Load the dataset
dataset = pd.read_csv('./Rwanda_altitude/rwanda_altitude.csv')

# Display basic information about the dataset
print("Dataset Information:")
print(dataset.info())
print("\nDataset Description:")
print(dataset.describe())
print("\nFirst 5 rows:")
print(dataset.head())
print("\nColumns:")
print(dataset.columns)

# Data cleaning
def clean_dataset(df):
    # Check for missing values
    print("\nMissing values:")
    print(df.isnull().sum())
    
    # Drop duplicates if any
    df_cleaned = df.drop_duplicates()
    print(f"\nRemoved {len(df) - len(df_cleaned)} duplicate rows")
    
    # Create a combined feature for district+sector
    df_cleaned['district_sector'] = df_cleaned['District'] + '_' + df_cleaned['Sector']
    
    return df_cleaned

cleaned_data = clean_dataset(dataset)

# Create and fit label encoders
district_encoder = LabelEncoder()
sector_encoder = LabelEncoder()
altitude_encoder = LabelEncoder()

# Fit the encoders on the full dataset
district_encoder.fit(cleaned_data['District'])
sector_encoder.fit(cleaned_data['Sector']) 
altitude_encoder.fit(cleaned_data['Altitude_Level'])

# Transform the data
cleaned_data['district_encoded'] = district_encoder.transform(cleaned_data['District'])
cleaned_data['sector_encoded'] = sector_encoder.transform(cleaned_data['Sector'])
cleaned_data['altitude_level_encoded'] = altitude_encoder.transform(cleaned_data['Altitude_Level'])

# Map encoded altitude levels back to original values for reference
altitude_mapping = dict(zip(range(len(altitude_encoder.classes_)), altitude_encoder.classes_))
print("\nAltitude level mapping:")
print(altitude_mapping)

# Feature and target selection
X = cleaned_data[['district_encoded', 'sector_encoded']]
y = cleaned_data['altitude_level_encoded']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print("\nModel Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Save all components for later use
joblib.dump(model, './models/rwanda_altitude_model.joblib')
joblib.dump(district_encoder, './models/district_encoder.joblib')
joblib.dump(sector_encoder, './models/sector_encoder.joblib')
joblib.dump(altitude_encoder, './models/altitude_encoder.joblib')
joblib.dump(altitude_mapping, './models/altitude_mapping.joblib')
joblib.dump(cleaned_data, './models/cleaned_data.joblib')  # Save for reference

print("\nModel and all components successfully saved to './models/' directory")

# Test predict function
def predict_altitude(district_name, sector_name):
    # Check if district and sector exist in our data
    if district_name not in district_encoder.classes_:
        return f"District '{district_name}' not found in the dataset"
    
    district_sectors = cleaned_data[cleaned_data['District'] == district_name]['Sector'].values
    if sector_name not in district_sectors:
        return f"Sector '{sector_name}' not found in district '{district_name}'"
    
    # Encode the input using the encoders
    district_enc = district_encoder.transform([district_name])[0]
    sector_enc = sector_encoder.transform([sector_name])[0]
    
    # Make prediction
    prediction = model.predict([[district_enc, sector_enc]])[0]
    
    # Map back to original altitude level
    altitude_level = altitude_mapping[prediction]
    
    return f"The predicted altitude level for {sector_name} in {district_name} is: {altitude_level}"

# Example usage
print("\nPrediction Examples:")
try:
    print(predict_altitude("Kicukiro", "Gatenga"))
    print(predict_altitude("Rwamagana", "Rubona"))
except Exception as e:
    print(f"Error during prediction: {e}")

In [17]:
import joblib
import sys

def load_model_components():
    """Load all required model components."""
    try:
        model = joblib.load('./models/rwanda_altitude_model.joblib')
        district_encoder = joblib.load('./models/district_encoder.joblib')
        sector_encoder = joblib.load('./models/sector_encoder.joblib')
        altitude_mapping = joblib.load('./models/altitude_mapping.joblib')
        
        return model, district_encoder, sector_encoder, altitude_mapping
    except FileNotFoundError as e:
        print(f"Error: Required model files not found. {e}")
        print("Please ensure you've run the training script first to generate model files.")
        return None, None, None, None

def predict_altitude(district_name, sector_name):
    """Predict altitude level for a given district and sector."""
    # Load the model and encoders
    model, district_encoder, sector_encoder, altitude_mapping = load_model_components()
    
    if model is None:
        return "Failed to load model components"
    
    # Check if district exists in our encoded data
    try:
        if district_name not in district_encoder.classes_:
            return f"District '{district_name}' not found in the dataset"
        
        # Encode the input
        district_enc = district_encoder.transform([district_name])[0]
        
        # Check if sector exists and encode it
        try:
            sector_enc = sector_encoder.transform([sector_name])[0]
        except ValueError:
            return f"Sector '{sector_name}' not found in the dataset"
        
        # Make prediction
        prediction = model.predict([[district_enc, sector_enc]])[0]
        
        # Map back to original altitude level
        altitude_level = altitude_mapping[prediction]
        
        return f"The predicted altitude level for {sector_name} in {district_name} is: {altitude_level}"
    
    except Exception as e:
        return f"Error during prediction: {str(e)}"

def list_available_districts():
    """List all available districts in the model."""
    _, district_encoder, _, _ = load_model_components()
    
    if district_encoder is None:
        return []
    
    return sorted(district_encoder.classes_)

def list_sectors_in_district(district_name):
    """List all sectors in a given district."""
    model, district_encoder, sector_encoder, _ = load_model_components()
    
    if district_encoder is None:
        return []
    
    # Load the original dataset to get sectors for a district
    try:
        dataset = joblib.load('./models/cleaned_data.joblib')
        if district_name in dataset['District'].values:
            return sorted(dataset[dataset['District'] == district_name]['Sector'].unique())
        else:
            print(f"District '{district_name}' not found")
            return []
    except:
        print("Could not load sector information. Sectors list unavailable.")
        return []




district_name =  input("Enter district name: ")
sector_name = input("Enter sector name: ")
all_districts = list_available_districts()
if not district_name in all_districts:
     print(f"District '{district_name}' is not available.")

all_sectors = list_sectors_in_district(district_name)
if not sector_name in all_sectors:
     print(f"Sector '{sector_name}' is not available in district '{district_name}'.")
     
predicted_altitude = predict_altitude(district_name, sector_name)
print(predicted_altitude)
     


The predicted altitude level for Gisozi in Gasabo is: middle




In [18]:
def interactive_prediction():
    """Interactive mode for predicting altitude level."""
    print("Rwanda Altitude Prediction - Interactive Mode")
    print("--------------------------------------------")
    
    districts = list_available_districts()
    if not districts:
        print("No district data available. Make sure you've trained the model first.")
        return
    
    print("\nAvailable districts:")
    for district in districts:
        print(f"- {district}")
    
    while True:
        print("\n")
        district = input("Enter district name (or 'exit' to quit): ")
        
        if district.lower() == 'exit':
            break
            
        if district not in districts:
            print(f"District '{district}' not found. Please choose from the available districts.")
            continue
            
        sector = input("Enter sector name: ")
        result = predict_altitude(district, sector)
        print(result)

# These functions are accessible directly without using command-line arguments,
# making them Jupyter-friendly

# Example predictions to demonstrate usage
print("Example predictions:")
print(predict_altitude("Kicukiro", "Gatenga"))
print(predict_altitude("Rwamagana", "Rubona"))

print("\nAvailable districts:")
districts = list_available_districts()
for district in districts[:5]:  # Just show first 5 to avoid clutter
    print(f"- {district}")
print(f"...and {len(districts) - 5} more districts")



Example predictions:
The predicted altitude level for Gatenga in Kicukiro is: high
The predicted altitude level for Rubona in Rwamagana is: middle

Available districts:
- Bugesera
- Burera
- Gakenke
- Gasabo
- Gatsibo
...and 25 more districts


