In [18]:
import pandas as pd
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go


In [2]:
file_path = 'GlobalWeatherRepository.csv'  
weather_data = pd.read_csv(file_path)
weather_data.head()

Unnamed: 0,country,location_name,latitude,longitude,timezone,last_updated_epoch,last_updated,temperature_celsius,temperature_fahrenheit,condition_text,...,air_quality_PM2.5,air_quality_PM10,air_quality_us-epa-index,air_quality_gb-defra-index,sunrise,sunset,moonrise,moonset,moon_phase,moon_illumination
0,Afghanistan,Kabul,34.52,69.18,Asia/Kabul,1693301400,2023-08-29 14:00,28.8,83.8,Sunny,...,7.9,11.1,1,1,05:24 AM,06:24 PM,05:39 PM,02:48 AM,Waxing Gibbous,93
1,Albania,Tirana,41.33,19.82,Europe/Tirane,1693301400,2023-08-29 11:30,27.0,80.6,Partly cloudy,...,28.2,29.6,2,3,06:04 AM,07:19 PM,06:50 PM,03:25 AM,Waxing Gibbous,93
2,Algeria,Algiers,36.76,3.05,Africa/Algiers,1693301400,2023-08-29 10:30,28.0,82.4,Partly cloudy,...,6.4,7.9,1,1,06:16 AM,07:21 PM,06:46 PM,03:50 AM,Waxing Gibbous,93
3,Andorra,Andorra La Vella,42.5,1.52,Europe/Andorra,1693301400,2023-08-29 11:30,10.2,50.4,Sunny,...,0.5,0.8,1,1,07:16 AM,08:34 PM,08:08 PM,04:38 AM,Waxing Gibbous,93
4,Angola,Luanda,-8.84,13.23,Africa/Luanda,1693301400,2023-08-29 10:30,25.0,77.0,Partly cloudy,...,139.6,203.3,4,10,06:11 AM,06:06 PM,04:43 PM,04:41 AM,Waxing Gibbous,93


In [3]:
print("Dimensiones del Dataset:", weather_data.shape)
print("\nTipos de Datos:\n", weather_data.dtypes)
print("\nEstadísticas Descriptivas:\n", weather_data.describe())



Dimensiones del Dataset: (26704, 41)

Tipos de Datos:
 country                          object
location_name                    object
latitude                        float64
longitude                       float64
timezone                         object
last_updated_epoch                int64
last_updated                     object
temperature_celsius             float64
temperature_fahrenheit          float64
condition_text                   object
wind_mph                        float64
wind_kph                        float64
wind_degree                       int64
wind_direction                   object
pressure_mb                     float64
pressure_in                     float64
precip_mm                       float64
precip_in                       float64
humidity                          int64
cloud                             int64
feels_like_celsius              float64
feels_like_fahrenheit           float64
visibility_km                   float64
visibility_miles         

In [4]:
print("Valores Faltantes por Columna:\n", weather_data.isnull().sum())

weather_data['last_updated'] = pd.to_datetime(weather_data['last_updated'], errors='coerce')

Valores Faltantes por Columna:
 country                         0
location_name                   0
latitude                        0
longitude                       0
timezone                        0
last_updated_epoch              0
last_updated                    0
temperature_celsius             0
temperature_fahrenheit          0
condition_text                  0
wind_mph                        0
wind_kph                        0
wind_degree                     0
wind_direction                  0
pressure_mb                     0
pressure_in                     0
precip_mm                       0
precip_in                       0
humidity                        0
cloud                           0
feels_like_celsius              0
feels_like_fahrenheit           0
visibility_km                   0
visibility_miles                0
uv_index                        0
gust_mph                        0
gust_kph                        0
air_quality_Carbon_Monoxide     0
air_quality_Ozon

In [5]:
# Agregar nuevas características relacionadas con el tiempo
weather_data['month'] = weather_data['last_updated'].dt.month
weather_data['day_of_week'] = weather_data['last_updated'].dt.dayofweek

print(weather_data.head())

       country     location_name  latitude  longitude        timezone  \
0  Afghanistan             Kabul     34.52      69.18      Asia/Kabul   
1      Albania            Tirana     41.33      19.82   Europe/Tirane   
2      Algeria           Algiers     36.76       3.05  Africa/Algiers   
3      Andorra  Andorra La Vella     42.50       1.52  Europe/Andorra   
4       Angola            Luanda     -8.84      13.23   Africa/Luanda   

   last_updated_epoch        last_updated  temperature_celsius  \
0          1693301400 2023-08-29 14:00:00                 28.8   
1          1693301400 2023-08-29 11:30:00                 27.0   
2          1693301400 2023-08-29 10:30:00                 28.0   
3          1693301400 2023-08-29 11:30:00                 10.2   
4          1693301400 2023-08-29 10:30:00                 25.0   

   temperature_fahrenheit condition_text  ...  air_quality_us-epa-index  \
0                    83.8          Sunny  ...                         1   
1             

In [6]:
# Gráfico de indicador (Gauge) para la Temperatura
gauge_chart = go.Figure(go.Indicator(
    domain = {'x': [0, 1], 'y': [0, 1]},
    value = weather_data['temperature_celsius'].iloc[0],
    mode = "gauge+number",
    title = {'text': 'Temperature (Celsius)'},
    gauge = {'axis': {'range': [None, 40]},
            'bar': {'color': "darkblue"},
            'steps' : [
                {'range': [0, 10], 'color': "lightgray"},
                {'range': [10, 20], 'color': "gray"}],
            }))
gauge_chart.show()

In [7]:
# Gráfica de Dispersión para Humedad vs Temperatura
scatter_plot = px.scatter(weather_data, x='temperature_celsius', y='humidity', color='humidity', 
                        color_continuous_scale=px.colors.sequential.RdBu, title='Temperature vs Humidity')
scatter_plot.show()


In [8]:
# Gráfico de direccion del viento
wind_direction_counts = weather_data['wind_direction'].value_counts()

bar_chart = px.bar(x=wind_direction_counts.index, y=wind_direction_counts.values, labels={'x': 'Wind Direction', 'y': 'Frequency'}, title='Frequency of Wind Directions')
bar_chart.show()


In [9]:
# Gráfico de Caja para Comparar Temperaturas entre Diferentes Países
weather_data['region'] = weather_data['timezone'].str.split('/').str[0]
box_plot_temperature_by_region = px.box(weather_data, x='region', y='temperature_celsius', title='Temperature Comparison by Region', color='region')
box_plot_temperature_by_region.show()





In [10]:
# Mapa de Dispersión de Temperaturas mundiales
mapbox_plot = px.scatter_mapbox(weather_data, lat='latitude', lon='longitude', hover_name='location_name', color='temperature_celsius', title='Temperature Map', zoom=1)
mapbox_plot.update_layout(mapbox_style='open-street-map', mapbox=dict(center=dict(lat=20, lon=0)))
mapbox_plot.show()

In [11]:
# Gráfico de Líneas para las Tendencias de Temperatura a lo Largo del Tiempo
line_plot = px.line(weather_data, x='last_updated', y='temperature_celsius', title='Temperature Trends Over Time')
line_plot.show()

In [21]:
from plotly.subplots import make_subplots

cities = weather_data['location_name'].unique()

max_temperatures = {}
min_temperatures = {}

for city in cities:
    city_data = weather_data[weather_data['location_name'] == city]
    max_temp = city_data['temperature_celsius'].max()
    min_temp = city_data['temperature_celsius'].min()
    max_temperatures[city] = max_temp
    min_temperatures[city] = min_temp
    
max_temperatures_df = pd.DataFrame(list(max_temperatures.items()), columns=['City', 'Max Temperature'])
min_temperatures_df = pd.DataFrame(list(min_temperatures.items()), columns=['City', 'Min Temperature'])



In [22]:
# Crear gráficos de barras para ciudades más calientes y más frías
hot_cities_chart = px.bar(max_temperatures_df.nlargest(10, 'Max Temperature'), x='City', y='Max Temperature',
                        title='Top 10 Hottest Cities', color='Max Temperature',
                        color_continuous_scale='reds', text='Max Temperature')
hot_cities_chart.update_traces(texttemplate='%{text:.2f}', textposition='inside')
hot_cities_chart.update_layout(yaxis=dict(range=[max_temperatures_df['Max Temperature'].min()-5, max_temperatures_df['Max Temperature'].max()+5]))

cold_cities_chart = px.bar(min_temperatures_df.nsmallest(10, 'Min Temperature'), x='City', y='Min Temperature',
                        title='Top 10 Coldest Cities', color='Min Temperature',
                        color_continuous_scale='blues', text='Min Temperature')
cold_cities_chart.update_traces(texttemplate='%{text:.2f}', textposition='inside')
cold_cities_chart.update_layout(yaxis=dict(range=[min_temperatures_df['Min Temperature'].min()-5, 5]))  # Set a minimum range below 0

hot_cities_chart.show()
cold_cities_chart.show()

In [26]:
# Calcular el promedio de calidad de aire para cada ciudad
avg_pollution = weather_data.groupby('location_name')['air_quality_us-epa-index'].mean().reset_index()
avg_pollution.rename(columns={'location_name': 'City', 'air_quality_us-epa-index': 'Average Pollution'}, inplace=True)

# Crear gráficos de barras para ciudades más y menos contaminadas
polluted_cities_chart = px.bar(avg_pollution.nlargest(10, 'Average Pollution'), x='City', y='Average Pollution',
                            title='Top 10 Most Polluted Cities', color='Average Pollution',
                            color_continuous_scale='purples', text='Average Pollution')
polluted_cities_chart.update_traces(texttemplate='%{text:.2f}', textposition='inside')

clean_cities_chart = px.bar(avg_pollution.nsmallest(10, 'Average Pollution'), x='City', y='Average Pollution',
                            title='Top 10 Least Polluted Cities', color='Average Pollution',
                            color_continuous_scale='Algae', text='Average Pollution')
clean_cities_chart.update_traces(texttemplate='%{text:.2f}', textposition='inside')

polluted_cities_chart.show()
clean_cities_chart.show()