<a href="https://colab.research.google.com/github/CristhianCastro96/BITrepository/blob/main/Act_IV_Obtencion_datos_desde_API_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#cargue de librerias
import pandas as pd
import requests
import folium
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Función para cargar el dataset original
def load_data(file_path):
    df = pd.read_csv(file_path)
    return df

# Función para clasificar la calidad del aire
def classify_air_quality(score):
    if score <= 4:
        return "Bajo"
    elif 4 < score <= 7:
        return "Moderado"
    elif 7 < score <= 9:
        return "Alto"
    else:
        return "Muy Alto"

# Función para agregar coordenadas
def add_coordinates(df, city_coordinates):
    df['latitude'] = df['City'].map(lambda x: city_coordinates.get(x, (None, None))[0])
    df['longitude'] = df['City'].map(lambda x: city_coordinates.get(x, (None, None))[1])
    return df

# Función para obtener datos de la API
def get_air_quality_data(city, api_key):
    url = f'http://api.waqi.info/feed/{city}/?token={api_key}'
    response = requests.get(url)
    data = response.json()
    if data['status'] == 'ok':
        return {
            'city': city,
            'aqi': data['data'].get('aqi', None),  # Usar .get() para evitar errores
            'dominantpol': data['data'].get('dominantpol', None),  # Usar .get()
            'time': data['data']['time'].get('s', None) if 'time' in data['data'] else None  # Validar 'time'
        }
    else:
        print(f"Error al obtener datos para {city}: {data['data']}")
        return None

# Función principal
def main():
    # Defino el mapa de colores
    color_map = {
        'Bajo': 'green',
        'Moderado': 'yellow',
        'Alto': 'orange',
        'Muy Alto': 'red'
    }

    # Cargo el dataset
    df = load_data("Urban Air Quality and Health Impact Dataset.csv")

    # Clasifico calidad del aire
    df['Air_Quality_Category'] = df['Health_Risk_Score'].apply(classify_air_quality)

    # Agrego coordenadas
    city_coordinates = {
        'Phoenix': (33.4484, -112.0740),
        'San Diego': (32.7157, -117.1611),
        'New York City': (40.7128, -74.0060),
    }
    df = add_coordinates(df, city_coordinates)

    # Creo el mapa
    map_center = [df['latitude'].mean(), df['longitude'].mean()]
    m = folium.Map(location=map_center, zoom_start=6)

    # Visualizo calidad del aire en el mapa
    for _, row in df.iterrows():
        if pd.notnull(row['latitude']) and pd.notnull(row['longitude']):
            folium.CircleMarker(
                location=[row['latitude'], row['longitude']],
                radius=5,
                color=color_map.get(row['Air_Quality_Category'], 'gray'),
                fill=True,
                fill_color=color_map.get(row['Air_Quality_Category'], 'gray'),
                fill_opacity=0.6,
                popup=f"City: {row['City']}<br>Quality: {row['Air_Quality_Category']}"
            ).add_to(m)

    # Muestro el mapa
    display(m)

    # Obtengo datos de calidad del aire de diferentes ciudades
    cities = ['Phoenix', 'New York City']
    air_quality_data_list = [get_air_quality_data(city, '0c7bf30230b4640dbf120b7bfaf1ee9f0beffc9d') for city in cities]
    air_quality_data_list = [data for data in air_quality_data_list if data is not None]

    # Creo un DataFrame con los datos de calidad del aire
    air_quality_df = pd.DataFrame(air_quality_data_list)
    print(air_quality_df)

    # Modelo los datos
    X = df[['temp', 'humidity', 'precip']]
    y = df['Health_Risk_Score']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestRegressor()
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    print(f'Error cuadrático medio: {mse}')

# Ejecuto el script
if __name__ == "__main__":
    main()


            city  aqi dominantpol                 time
0        Phoenix   45        None  2024-10-28 17:00:00
1  New York City   19        None  2024-10-28 18:00:00
Error cuadrático medio: 0.0754193570776333
