# Limpieza y transformación de datos meteorológicos

In [None]:
import pandas as pd

# 1. Cargar archivo original

In [None]:
file_path = r"E:\Downloads\weather (2).xlsx"   # Cambia por tu ruta real

columns = [
    "Unused", "Temperature (°F)", "Condition", "Wind Speed (mph)", "Direction",
    "Humidity (%)", "Pressure (Hg)", "Visibility (mi)"
]

# Cargar el dataset, ignorando la primera fila (encabezado irregular)
df = pd.read_excel(file_path, header=1, names=columns)

# 2. Limpieza inicial

In [None]:
df = df.drop(columns=["Unused"])  # eliminar columna sin uso
df = df.dropna(how="all")         # eliminar filas vacías

# 3. Conversión de unidades

In [None]:
# Temperatura
df["Temperature (°F)"] = (
    df["Temperature (°F)"]
    .str.replace("°F", "", regex=False)
    .str.strip()
    .astype(float)
)
df["Temperature (°C)"] = (df["Temperature (°F)"] - 32) * 5 / 9

# Velocidad del viento
df["Wind Speed (mph)"] = (
    df["Wind Speed (mph)"]
    .str.replace("mph", "", regex=False)
    .str.strip()
    .replace("No wind", "0")
    .astype(float)
)
df["Wind Speed (km/h)"] = df["Wind Speed (mph)"] * 1.60934

# Presión
df["Pressure (Hg)"] = (
    df["Pressure (Hg)"]
    .str.replace('"Hg', "", regex=False)
    .str.strip()
    .astype(float)
)
df["Pressure (hPa)"] = df["Pressure (Hg)"] * 33.8639

# Visibilidad
df["Visibility (mi)"] = (
    df["Visibility (mi)"]
    .str.replace("mi", "", regex=False)
    .str.strip()
    .astype(float)
)
df["Visibility (km)"] = df["Visibility (mi)"] * 1.60934

# Humedad
df["Humidity (%)"] = (
    df["Humidity (%)"]
    .str.replace("%", "", regex=False)
    .str.strip()
    .astype(float)
)
df["Humidity"] = df["Humidity (%)"] / 100

# 4. Limpieza de texto en columnas categóricas

In [None]:
df["Condition"] = df["Condition"].str.rstrip(".")

# 5. Eliminar columnas duplicadas (mantener solo métricas en SI)

In [None]:
df = df.drop(
    columns=["Temperature (°F)", "Wind Speed (mph)", "Pressure (Hg)", "Visibility (mi)", "Humidity (%)"]
)

# 6. Guardar archivo limpio

In [None]:
output_path = r"E:\Downloads\cleaned_weather_data.xlsx"
df.to_excel(output_path, index=False)

print("Limpieza completada. Archivo guardado en:", output_path)