In [22]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Function to generate random weather data
def generate_weather_data(num_days):
    # Generate a range of dates
    start_date = datetime(1974, 1, 1)
    dates = [start_date + timedelta(days=i) for i in range(num_days)]

    # Generate random weather parameters
    temperatures = np.random.uniform(low=15, high=40, size=num_days)  # Temperature in °C
    humidities = np.random.uniform(low=30, high=100, size=num_days)     # Humidity in %
    wind_speeds = np.random.uniform(low=5, high=20, size=num_days)      # Wind speed in km/h
    pressures = np.random.uniform(low=980, high=1050, size=num_days)     # Atmospheric pressure in hPa
    cloud_covers = np.random.uniform(low=0, high=100, size=num_days)     # Cloud cover in %
    precipitations = np.random.uniform(low=0, high=50, size=num_days)     # Precipitation in mm

    # Create a DataFrame
    weather_data = pd.DataFrame({
        'Date': dates,
        'Temperature (°C)': temperatures,
        'Humidity (%)': humidities,
        'Wind Speed (km/h)': wind_speeds,
        'Atmospheric Pressure (hPa)': pressures,
        'Cloud Cover (%)': cloud_covers,
        'Precipitation (mm)': precipitations
    })

    return weather_data

# Generate a dataset for 365 days
weather_dataset = generate_weather_data(18250)

# Display the first few rows of the dataset
print(weather_dataset.head())

# Optionally save to CSV
weather_dataset.to_csv('weather_data.csv', index=False)


        Date  Temperature (°C)  Humidity (%)  Wind Speed (km/h)  \
0 1974-01-01         23.791552     74.874343          11.218026   
1 1974-01-02         27.389090     68.593140           6.962119   
2 1974-01-03         26.278130     73.353099          14.884260   
3 1974-01-04         32.636019     82.560034          19.659407   
4 1974-01-05         32.086648     57.454510          11.266764   

   Atmospheric Pressure (hPa)  Cloud Cover (%)  Precipitation (mm)  
0                 1014.933465        52.214489           36.332988  
1                 1003.538086        63.906003           14.667195  
2                  981.204394        52.665559           32.181863  
3                 1030.397735        99.850092           33.329446  
4                 1011.801489        60.154664           23.442226  


In [23]:
df = pd.read_csv("weather_data.csv")
df


Unnamed: 0,Date,Temperature (°C),Humidity (%),Wind Speed (km/h),Atmospheric Pressure (hPa),Cloud Cover (%),Precipitation (mm)
0,1974-01-01,23.791552,74.874343,11.218026,1014.933465,52.214489,36.332988
1,1974-01-02,27.389090,68.593140,6.962119,1003.538086,63.906003,14.667195
2,1974-01-03,26.278130,73.353099,14.884260,981.204394,52.665559,32.181863
3,1974-01-04,32.636019,82.560034,19.659407,1030.397735,99.850092,33.329446
4,1974-01-05,32.086648,57.454510,11.266764,1011.801489,60.154664,23.442226
...,...,...,...,...,...,...,...
18245,2023-12-15,38.241138,30.857943,9.740871,1038.902470,44.006435,16.640101
18246,2023-12-16,23.594431,30.178786,15.751069,1004.203954,88.494016,25.436394
18247,2023-12-17,31.983164,72.254068,18.854995,1047.392222,80.601851,26.733669
18248,2023-12-18,21.421267,30.289725,13.172705,980.728994,53.053592,16.903225


In [24]:
import pandas as pd

# Load the dataset
file_path = "weather_data.csv"
df = pd.read_csv(file_path)

# Display basic information and first few rows
df.info(), df.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18250 entries, 0 to 18249
Data columns (total 7 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Date                        18250 non-null  object 
 1   Temperature (°C)            18250 non-null  float64
 2   Humidity (%)                18250 non-null  float64
 3   Wind Speed (km/h)           18250 non-null  float64
 4   Atmospheric Pressure (hPa)  18250 non-null  float64
 5   Cloud Cover (%)             18250 non-null  float64
 6   Precipitation (mm)          18250 non-null  float64
dtypes: float64(6), object(1)
memory usage: 998.2+ KB


(None,
          Date  Temperature (°C)  Humidity (%)  Wind Speed (km/h)  \
 0  1974-01-01         23.791552     74.874343          11.218026   
 1  1974-01-02         27.389090     68.593140           6.962119   
 2  1974-01-03         26.278130     73.353099          14.884260   
 3  1974-01-04         32.636019     82.560034          19.659407   
 4  1974-01-05         32.086648     57.454510          11.266764   
 
    Atmospheric Pressure (hPa)  Cloud Cover (%)  Precipitation (mm)  
 0                 1014.933465        52.214489           36.332988  
 1                 1003.538086        63.906003           14.667195  
 2                  981.204394        52.665559           32.181863  
 3                 1030.397735        99.850092           33.329446  
 4                 1011.801489        60.154664           23.442226  )

In [25]:
# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Check for missing values
missing_values = df.isnull().sum()

# Display summary
df.describe(), missing_values


(                      Date  Temperature (°C)  Humidity (%)  Wind Speed (km/h)  \
 count                18250      18250.000000  18250.000000       18250.000000   
 mean   1998-12-25 12:00:00         27.462182     65.025915          12.486689   
 min    1974-01-01 00:00:00         15.001394     30.002105           5.000047   
 25%    1986-06-29 06:00:00         21.221495     47.691533           8.729447   
 50%    1998-12-25 12:00:00         27.472918     64.930534          12.483700   
 75%    2011-06-22 18:00:00         33.754821     82.484841          16.228028   
 max    2023-12-19 00:00:00         39.998803     99.993289          19.998788   
 std                    NaN          7.226500     20.128968           4.331189   
 
        Atmospheric Pressure (hPa)  Cloud Cover (%)  Precipitation (mm)  
 count                18250.000000     18250.000000        18250.000000  
 mean                  1014.979258        50.075230           25.095443  
 min                    980.001889    

In [26]:
# Feature Engineering: Create Lag Features for Past 3 Days
for lag in range(1, 4):
    df[f'Temp_Lag_{lag}'] = df['Temperature (°C)'].shift(lag)
    df[f'Humidity_Lag_{lag}'] = df['Humidity (%)'].shift(lag)
    df[f'Wind_Lag_{lag}'] = df['Wind Speed (km/h)'].shift(lag)
    df[f'Pressure_Lag_{lag}'] = df['Atmospheric Pressure (hPa)'].shift(lag)
    df[f'Cloud_Lag_{lag}'] = df['Cloud Cover (%)'].shift(lag)
    df[f'Precip_Lag_{lag}'] = df['Precipitation (mm)'].shift(lag)

# Moving Averages (Trend Features)
df['Temp_MA_3'] = df['Temperature (°C)'].rolling(window=3).mean()
df['Humidity_MA_3'] = df['Humidity (%)'].rolling(window=3).mean()
df['Precip_MA_3'] = df['Precipitation (mm)'].rolling(window=3).mean()

# Extract Date Features
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day

# Drop initial rows with NaN due to shifting
df.dropna(inplace=True)

# Display final dataset with new features
df.head()


Unnamed: 0,Date,Temperature (°C),Humidity (%),Wind Speed (km/h),Atmospheric Pressure (hPa),Cloud Cover (%),Precipitation (mm),Temp_Lag_1,Humidity_Lag_1,Wind_Lag_1,...,Wind_Lag_3,Pressure_Lag_3,Cloud_Lag_3,Precip_Lag_3,Temp_MA_3,Humidity_MA_3,Precip_MA_3,Year,Month,Day
3,1974-01-04,32.636019,82.560034,19.659407,1030.397735,99.850092,33.329446,26.27813,73.353099,14.88426,...,11.218026,1014.933465,52.214489,36.332988,28.767747,74.835424,26.726168,1974,1,4
4,1974-01-05,32.086648,57.45451,11.266764,1011.801489,60.154664,23.442226,32.636019,82.560034,19.659407,...,6.962119,1003.538086,63.906003,14.667195,30.333599,71.122548,29.651178,1974,1,5
5,1974-01-06,33.548573,78.588346,15.055392,1008.592208,16.275844,23.548246,32.086648,57.45451,11.266764,...,14.88426,981.204394,52.665559,32.181863,32.75708,72.86763,26.773306,1974,1,6
6,1974-01-07,23.116516,49.100931,14.972878,1021.949081,2.99051,1.922684,33.548573,78.588346,15.055392,...,19.659407,1030.397735,99.850092,33.329446,29.583913,61.714596,16.304385,1974,1,7
7,1974-01-08,18.685588,36.481218,14.546058,1001.38916,14.851112,6.174627,23.116516,49.100931,14.972878,...,11.266764,1011.801489,60.154664,23.442226,25.116892,54.723499,10.548519,1974,1,8


In [29]:
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error

# Select target variable (Temperature Prediction for next day)
target = 'Temperature (°C)'
features = [col for col in df.columns if col not in ['Date', target]]

# Split data into train and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2, random_state=42, shuffle=False)

# Train the XGBoost Regressor
model = XGBRegressor(n_estimators=500, learning_rate=0.05, max_depth=6, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae}")

def farming_advice(temp, humidity, wind, precip):
    advice = []

    # Irrigation Management
    if temp > 30 and precip < 5:
        advice.append("Consider irrigating the crops due to high temperature and low rainfall.")

    # Fertilizer Application
    if precip > 20:
        advice.append("Avoid fertilizer application due to heavy rain prediction.")
    else:
        advice.append("Fertilizer application is safe.")

    # Disease Prevention
    if humidity > 80:
        advice.append("High humidity detected! Monitor for fungal infections and consider fungicide application.")

    # Harvesting Decision
    if wind > 15 or precip > 10:
        advice.append("Avoid harvesting due to strong winds or rain.")
    else:
        advice.append("Weather is suitable for harvesting.")

    return advice

# Get latest test data row (for the most recent prediction)
latest_test_features = X_test.iloc[-1]  # Take the last row of test features
latest_pred_temp = y_pred[-1]  # Corresponding predicted temperature

# Extract relevant values
humidity = latest_test_features.get('Humidity', 50)  # Default value in case column is missing
wind = latest_test_features.get('Wind Speed', 5)  # Default value
precip = latest_test_features.get('Precipitation', 0)  # Default value

# Generate and print farming advice
advice = farming_advice(latest_pred_temp, humidity, wind, precip)
print("\nFarming Advice:")
for a in advice:
    print("-", a)


Mean Absolute Error: 0.40792994517121706

Farming Advice:
- Fertilizer application is safe.
- Weather is suitable for harvesting.
