#### Data preprocessing 

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Step 1.1: Load the dataset
data = pd.read_excel('district_risk_data_reorganized_rice.xlsx')

# Step 1.2: Handle Missing Data
data.fillna(method='ffill', inplace=True)  # Forward fill
data.fillna(method='bfill', inplace=True)  # Backward fill

# Step 1.3: Feature Engineering
reduction_factor = 0.2  # 20% reduction in yield for disaster years
data['Adjusted_Yield'] = data.apply(
    lambda row: row['Yield'] * (1 - reduction_factor) if row['Natural_Disaster_Risk'] == 1 else row['Yield'],
    axis=1
)

data['Overall_Risk'] = (
    data['Natural_Disaster_Risk'] +
    data['Temperature_Risk'] +
    data['Economic_Political_Risk']
) / 3

# Step 1.4: Normalize/Standardize Data
numerical_columns = ['Area', 'Production', 'Yield', 'Temperature', 'Overall_Risk']
scaler = MinMaxScaler()
data[numerical_columns] = scaler.fit_transform(data[numerical_columns])

# Step 1.5: Transform Categorical Data (if applicable)
data = pd.get_dummies(data, columns=['District'], drop_first=True)

# Save the processed data to a new Excel file
data.to_excel('processed_rice_data.xlsx', index=False)

# Print a summary
print("Data preprocessing complete!")
print(f"Processed data shape: {data.shape}")



Data preprocessing complete!
Processed data shape: (176, 25)


  data.fillna(method='ffill', inplace=True)  # Forward fill
  data.fillna(method='bfill', inplace=True)  # Backward fill


#### Step 2: Splitting Data for Model Training


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Step 2.1: Load the processed data
data = pd.read_excel('processed_rice_data.xlsx')

# Step 2.2: Define Features (X) and Target (y)
# Features include risk factors and other predictors
X = data[['Natural_Disaster_Risk', 'Temperature_Risk', 'Economic_Political_Risk', 
          'Area', 'Production', 'Temperature', 'Overall_Risk']]  # Adjust as needed
y = data['Adjusted_Yield']  # Target

# Step 2.3: Split Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shapes of the resulting datasets
print(f"Training Features Shape: {X_train.shape}")
print(f"Testing Features Shape: {X_test.shape}")
print(f"Training Target Shape: {y_train.shape}")
print(f"Testing Target Shape: {y_test.shape}")


Training Features Shape: (140, 7)
Testing Features Shape: (36, 7)
Training Target Shape: (140,)
Testing Target Shape: (36,)


In [3]:
X_train

Unnamed: 0,Natural_Disaster_Risk,Temperature_Risk,Economic_Political_Risk,Area,Production,Temperature,Overall_Risk
156,0,0,0.000000,0.541942,0.575213,0.243771,0.000000
136,0,1,0.000000,0.444528,0.441519,0.799099,0.293050
98,1,0,0.000000,0.358088,0.184282,0.644770,0.293050
157,0,0,0.000000,0.575015,0.575213,0.280283,0.000000
38,0,0,0.000000,0.384997,0.400022,0.619171,0.000000
...,...,...,...,...,...,...,...
71,1,1,1.317756,0.585388,0.215405,0.983167,0.972269
106,0,0,1.306665,0.385749,0.409616,0.305082,0.382918
14,0,0,0.000000,0.384095,0.596129,0.274787,0.000000
92,0,0,0.000000,0.473391,0.528920,0.699468,0.000000


In [4]:
X_test

Unnamed: 0,Natural_Disaster_Risk,Temperature_Risk,Economic_Political_Risk,Area,Production,Temperature,Overall_Risk
19,1,0,0.0,0.658449,0.427408,0.181271,0.29305
45,0,1,0.0,0.575767,0.639355,0.884683,0.29305
139,0,0,0.0,0.966476,0.992024,0.7543,0.0
30,0,1,0.0,0.381088,0.60628,0.04564,0.29305
67,1,0,0.0,0.650631,0.427408,0.707345,0.29305
16,1,0,0.0,0.014582,0.01333,0.565991,0.29305
119,1,1,0.0,0.620565,0.498689,0.950815,0.5861
172,0,1,0.925529,0.54525,0.580791,0.904553,0.564277
109,1,1,0.0,0.604029,0.410062,0.029638,0.5861
140,0,0,0.0,0.539086,0.574879,0.322489,0.0


In [5]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Initialize the Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)  # Start with 100 trees

# Step 2: Train the model on the training data
rf_model.fit(X_train, y_train)

# Step 3: Make predictions on the test data
y_pred = rf_model.predict(X_test)

# Step 4: Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Random Forest Regressor Performance:")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R² Score: {r2}")

# Step 5: Feature Importance (Optional)
# Get feature importance scores
feature_importances = rf_model.feature_importances_
feature_names = X_train.columns

# Print feature importances
print("\nFeature Importances:")
for name, importance in zip(feature_names, feature_importances):
    print(f"{name}: {importance:.4f}")


Random Forest Regressor Performance:
Mean Squared Error (MSE): 0.02063358715915315
R² Score: 0.9691417814413732

Feature Importances:
Natural_Disaster_Risk: 0.7324
Temperature_Risk: 0.0002
Economic_Political_Risk: 0.0006
Area: 0.1207
Production: 0.1385
Temperature: 0.0060
Overall_Risk: 0.0017


#### model improvement 

In [11]:
import requests

# Your NewsAPI key
api_key = '8f0c6c1305ad4992a325957e93a9c669'

# Define the URL to fetch news articles related to natural disasters, floods, droughts, etc.
url = f'https://newsapi.org/v2/everything?q=natural+disaster+OR+flood+OR+drought&apiKey={api_key}'

# Send a request to the API
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    data = response.json()
    articles = data['articles']
    
    # Print article titles and descriptions
    for article in articles:
        print(f"Title: {article['title']}")
        print(f"Description: {article['description']}")
        print(f"URL: {article['url']}")
        print("-" * 50)
else:
    print(f"Failed to retrieve data. Status code: {response.status_code}")


Title: Exceptional natural disaster declared in Mayotte after cyclone
Description: Rescuers race to find missing people and survivors in the French territory devastated by Cyclone Chido.
URL: https://www.bbc.com/news/articles/cr56q8qg312o
--------------------------------------------------
Title: Zillow listings reveal what homebuyers are obsessed with right now
Description: Zillow analyzed millions of listings to figure out what homebuyers want in 2025. In-demand properties are cozy, disaster-resistant, and high-tech.
URL: https://www.businessinsider.com/home-trends-buyers-obsessed-with-in-2025-from-zillow-listings-2024-12
--------------------------------------------------
Title: Use the ‘Anti-AI’ Camera Apps Zerocam and Hallide to Keep Your Photos Looking More Natural
Description: It’s still possible to just take normal-looking photos. Use these apps for Android and iOS to give your phone’s camera a more natural eye.
URL: https://www.wired.com/story/zerocam-hallide-anti-ai-camera-apps

In [None]:
import requests
from transformers import pipeline

# Load the BART summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Function to fetch real-time articles using NewsAPI
def fetch_articles(api_key, query="natural disaster", language="en"):
    url = f"https://newsapi.org/v2/everything?q={query}&language={language}&apiKey={api_key}"
    response = requests.get(url)
    articles = response.json().get('articles', [])
    return articles

# Function to summarize the article using BART
def summarize_article(article_text):
    summary = summarizer(article_text, max_length=150, min_length=50, do_sample=False)
    return summary[0]['summary_text']

# Function to analyze the risk based on keywords
def analyze_risk(article_title, article_description):
    risk_keywords = ['flood', 'drought', 'earthquake', 'storm', 'cyclone', 'wildfire']
    risk_score = 0

    # Check for risk-related keywords in title and description
    for keyword in risk_keywords:
        if keyword.lower() in article_title.lower() or keyword.lower() in article_description.lower():
            risk_score += 1  # Increment risk score for each keyword found
    
    return risk_score

# Function to process and summarize fetched articles
def process_articles(api_key):
    # Fetch real-time articles
    articles = fetch_articles(api_key)
    
    # Process each article
    for article in articles:
        title = article['title']
        description = article['description']
        content = article['content']
        
        # Summarize the article
        summary = summarize_article(content) if content else "No content available for summarization."
        
        # Analyze the risk based on keywords
        risk_score = analyze_risk(title, description)
        
        # Print or store the results
        print(f"Title: {title}")
        print(f"Summary: {summary}")
        print(f"Risk Score: {risk_score}")
        print("-" * 50)

# Example usage
api_key = '8f0c6c1305ad4992a325957e93a9c669'  # Replace with your actual API key
process_articles(api_key)
