In [25]:
import pandas as pd
import time
import logging
import functools
import cProfile
import csv
from collections import defaultdict
import asyncio
import aiohttp

#logging setup
logging.basicConfig(
    filename = "kenya_data_processing.log",
    format = "%(asctime)s - %(levelname)s - %(message)s",
    level = logging.INFO,
    filemode = "w"
)
#custom exception
class InvalidDataError(Exception):
    pass
    
#generator to read csv dataset
def read_csv(filepath):
    with open(filepath, newline="") as csvfile:
        reader = csv.ReaderDict(csvfile)
        for row in reader:
            yield row
        
#mock API fetching mock weather data
async def fetch_weather_data(county):
    async with aiohttp.ClientSession() as session:
        await asyncio.sleep(1)
        logging.info(f"Completed fetching {county} county data!")
        return {"county": county, "temperature": 25}

#caching for efficiency
def memoize(func):
    cach = {}
    @functools.wraps(func)
    def wrapper(*args):
        if args in cach:
            return cach[args]
        results = func(*args) 
        cach[args] = results
        return results
    return wrapper
    
#total populations per county
@memoize
def aggregate_population(data):
    return sum(data)

def timing(func):
    @functools.wraps(func)
    def wrapper(*args, **kwargs):  
        start_time = time.time()
        results =await  func(*args, **kwargs)
        end_time = time.time()
        logging.info(f"{func.__name__} took {end_time - start_time:.2f} seconds")
        return results
    return wrapper   
    
@timing
async def process_data(filepath):
    try:
        #initializing data structures
        county_populations = directDict(list)
        valid_counties = ["Mombasa", "Nairobi", "Nakuru"]
        
        for row in read_csv(filepath):
            county = row["county"]
            population = int(row["population"])
            area = int(row["area_in_sqm"])
               
            if county not in valid_counties:
                logging.error(f"Invalid county entered: {county} county ")
                raise InvalidDataError(f"Invalid county: {county}")
            if population < 0:
                logging.error(f"Invalid population data: {population}")
                raise InvalidDataError(f"Invalid population: {population}")
    
            county_populations[county].append(population) 
            logging.info(f"Completed row for {county} county")   
        
        weather_tasks = [fetch_weather_data(county) for county in county_populations.keys()]
        weather_results = await asyncio.gather(*weather_tasks)

        total_populations = { county: aggregate_population(population) for county, population in county_populations.items()}
        final_results = {"population": total_populations, "weather": { item["county"]: item["temperature"] for item in weather_results }}
        return final_results
        
    except InvalidDataError as e:
        logging.error(f"Validation error: {e}")
        raise 
    except Exception as e :
        logging.error(f"Unexpected error: {e}")
        raise 
#main function
async def main():
    filepath = "kenyan_county_pop.csv"
    results = await process_data(filepath)
    print("Output: ", results)
#profiling our main function    
if __name__ == "__main__" :
    cProfile.run((main())

            
    
    

SyntaxError: incomplete input (750337838.py, line 109)

In [4]:
import pandas as pd
df = pd.read_csv("./BackendFiles/kenya_county_pop.csv")
df                                              