# Maji Mdogo

In [1]:
import os
import re

In [2]:
from field_data_processor import FieldDataProcessor
from weather_data_processor import WeatherDataProcessor
import logging


logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

config_params = {
    'sql_query':"""
        SELECT * 
        FROM geographic_features
        LEFT JOIN weather_features USING(Field_ID)
        LEFT JOIN soil_and_crop_features USING(Field_ID)
        LEFT JOIN farm_management_features USING(Field_ID)
    """,
    'db_path': 'Maji_Ndogo_farm_survey_small.db',
    'columns_to_rename': {
        'Annual_yield': 'Crop_type',
        'Crop_type': 'Annual_yield',
        'Ave_temps': 'Temperature'
    },
    'values_to_rename': {
        'cassava ': 'cassava',
        'wheat ': 'wheat',
        'tea ': 'tea',
        'cassaval': 'cassava',
        'wheatn': 'wheat',
        'teaa': 'tea'
    },
    'weather_map_url': "https://raw.githubusercontent.com/Explore-AI/Public-Data/master/Maji_Ndogo/Weather_data_field_mapping.csv",
    'weather_url': "https://raw.githubusercontent.com/Explore-AI/Public-Data/master/Maji_Ndogo/Weather_station_data.csv",

    'regex_patterns': {
        'Rainfall': r'(\d+(\.\d+)?)\s?mm',
        'Temprature': r'(\d+(\.\d+)?)\s?C',
        'Pollution_level': r'=\s*(-?\d+(\.\d+)?)|Pollution at \s*(-?\d+(\.\d+)?)',
    }
    
}

weather_processor = WeatherDataProcessor(config_params)
weather_processor.process_data()
weather_df = weather_processor.weather_df

field_processor = FieldDataProcessor(config_params)
field_processor.process_data()
field_df = field_processor.df

print(field_df.columns)
print(weather_df.columns)

  


2025-06-09 13:27:00,531 - INFO - CSV file read successfully from https://raw.githubusercontent.com/Explore-AI/Public-Data/master/Maji_Ndogo/Weather_station_data.csv, returned 1843 rows.
2025-06-09 13:27:00,533 - weather_data_processor.WeatherDataProcessor - INFO - Weather data loaded successfully.
2025-06-09 13:27:00,603 - weather_data_processor.WeatherDataProcessor - INFO - Processed weather messages to extract measurements.
2025-06-09 13:27:00,608 - weather_data_processor.WeatherDataProcessor - INFO - Calculated average values for each measurement.
2025-06-09 13:27:00,611 - weather_data_processor.WeatherDataProcessor - INFO - Weather data processing completed successfully.
2025-06-09 13:27:00,613 - field_data_processor.FieldDataProcessor - INFO - Starting data processing...
2025-06-09 13:27:00,616 - INFO - Database engine created successfully for Maji_Ndogo_farm_survey_small.db
2025-06-09 13:27:00,693 - INFO - Query executed successfully, returned 5654 rows.
2025-06-09 13:27:00,694 -

Index(['Field_ID', 'Elevation', 'Latitude', 'Longitude', 'Location', 'Slope',
       'Rainfall', 'Min_temperature_C', 'Max_temperature_C', 'Temperature',
       'Soil_fertility', 'Soil_type', 'pH', 'Pollution_level', 'Plot_size',
       'Annual_yield', 'Crop_type', 'Standard_yield', 'Weather_station'],
      dtype='object')
Index(['Weather_station_ID', 'Message', 'Measurement', 'Value'], dtype='object')
