In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, FloatType, ArrayType, IntegerType
import requests
from datetime import datetime
import os
import json
import pandas as pd

pd.set_option('display.max_columns', None)


In [2]:
spark = SparkSession.builder \
        .appName('GuardarDatosClimaticos') \
        .getOrCreate()

In [6]:
url = 'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/'

params = {
    'locacion' : 'Sicilia',
    'fecha' : datetime.now().strftime('%Y-%m-%d')
}

def obtener_api_key(file_path):
    
    with open(file_path, 'r') as file:
        return file.read().strip()
    
    
def extraer_datos_climaticos(url, params, api_key):
    
    locacion = params['locacion']
    fecha = params['fecha']
    url_final = f'{url}{locacion}/{fecha}?key={api_key}'
    
    try:
        response = requests.get(url_final)
        
        if response.status_code == 200:
            return response.json()
        
    except requests.exceptions.RequestException as e:
            print(f'Error {e}')
            return None
        
        
def guardar_archivos_datos(data):
    
    directorio_actual = os.getcwd()
    nombre_carpeta_archivos = 'Datos'
    carpeta_archivos = os.path.join(directorio_actual, nombre_carpeta_archivos)
    fecha_actual = datetime.now().strftime('%Y-%m-%d')
    nombre_archivo = f'datos_climaticos_{fecha_actual}.json'
    ruta_archivos = os.path.join(carpeta_archivos, nombre_archivo)
    
    # Crear la carpeta si no existe
    if not os.path.exists(carpeta_archivos):
        os.makedirs(carpeta_archivos)

    try:
        # Normalizar los datos si es necesario
        if isinstance(data, (list, dict)):
            with open(ruta_archivos, 'w', encoding= 'utf-8') as archivo_json:
                json.dump(data, archivo_json, separators = (',', ':'))
            print(f'Archivo JSON guardado en: {ruta_archivos}')
                        
    except Exception as e:
        print(f'Error al guardar los datos: {e}')      
    
    spark.stop()
    
api_key = obtener_api_key('api_key.txt')

data = extraer_datos_climaticos(url, params, api_key)

guardar_archivos_datos(data)

Archivo JSON guardado en: c:\Users\dispe\OneDrive\Documentos\CLASES\Proyecto_Procesamiento_Automatizacion\Datos\datos_climaticos_2025-01-20.json


In [62]:
import pyspark.sql.types as f

def generar_esquema(data):
    
    fields = []
    
    for col in data.columns:
        col_modificado = col.replace('.', '_')
        dtype = data[col].dtype
        
        if dtype == 'object':
            fields.append(StructField(col_modificado, StringType(), True))
        
        elif dtype == 'float64':
            fields.append(StructField(col_modificado, FloatType(), True))
        
        elif dtype == 'int64':
            fields.append(StructField(col_modificado, IntegerType(), True))
        
        else:
            fields.append(StructField(col_modificado, StringType(), True))
        
    return StructType(fields)

schema = generar_esquema(data_df)

In [4]:
from pyspark.sql.functions import col
from pyspark.sql.functions import explode

df_desanidado_currentConditions = df1.select(
    col('currentConditions.cloudcover').alias('cloudcover'),
    col('currentConditions.conditions').alias('conditions'),
    col('currentConditions.datetime').alias('datetime'),
    col('currentConditions.datetimeEpoch').alias('datetimeEpoch'),
    col('currentConditions.dew').alias('dew'),
    col('currentConditions.feelslike').alias('feelslike'),
    col('currentConditions.humidity').alias('humidity'),
    col('currentConditions.icon').alias('icon'),
    col('currentConditions.moonphase').alias('moonphase'),
    col('currentConditions.precip').alias('precip'),
    col('currentConditions.precipprob').alias('precipprob'),
    col('currentConditions.preciptype').alias('preciptype'),
    col('currentConditions.pressure').alias('pressure'),
    col('currentConditions.snow').alias('snow'),
    col('currentConditions.snowdepth').alias('snowdepth'),
    col('currentConditions.solarenergy').alias('solarenergy'),
    col('currentConditions.solarradiation').alias('solarradiation'),
    col('currentConditions.source').alias('source'),
    col('currentConditions.stations').alias('stations'),
    col('currentConditions.sunrise').alias('sunrise'),
    col('currentConditions.sunriseEpoch').alias('sunriseEpoch'),
    col('currentConditions.sunset').alias('sunset'),
    col('currentConditions.sunsetEpoch').alias('sunsetEpoch'),
    col('currentConditions.temp').alias('temp'),
    col('currentConditions.uvindex').alias('uvindex'),
    col('currentConditions.visibility').alias('visibility'),
    col('currentConditions.winddir').alias('winddir'),
    col('currentConditions.windgust').alias('windgust'),
    col('currentConditions.windspeed').alias('windspeed') 
)
df_final_currentConditions = df_desanidado_currentConditions.withColumn('station', explode('stations')).drop('stations').show()

+----------+----------------+--------+-------------+----+---------+--------+-----------------+---------+------+----------+----------+--------+----+---------+-----------+--------------+------+--------+------------+--------+-----------+----+-------+----------+-------+--------+---------+-------+
|cloudcover|      conditions|datetime|datetimeEpoch| dew|feelslike|humidity|             icon|moonphase|precip|precipprob|preciptype|pressure|snow|snowdepth|solarenergy|solarradiation|source| sunrise|sunriseEpoch|  sunset|sunsetEpoch|temp|uvindex|visibility|winddir|windgust|windspeed|station|
+----------+----------------+--------+-------------+----+---------+--------+-----------------+---------+------+----------+----------+--------+----+---------+-----------+--------------+------+--------+------------+--------+-----------+----+-------+----------+-------+--------+---------+-------+
|      25.0|Partially cloudy|14:50:00|   1736430600|41.6|     64.5|    43.1|partly-cloudy-day|     0.34|   0.0|       

In [5]:
df_days_exploded = df1.withColumn('day', explode('days')).drop('days')
df_days_exploded.show()

+-------+------+--------------------+--------------------+--------+---------+---------+---------------+--------------------+-----------+--------+--------------------+
|address|alerts|   currentConditions|         description|latitude|longitude|queryCost|resolvedAddress|            stations|   timezone|tzoffset|                 day|
+-------+------+--------------------+--------------------+--------+---------+---------+---------------+--------------------+-----------+--------+--------------------+
|Sicilia|    []|{25.0, Partially ...|Similar temperatu...| 38.1221|  13.3611|        1|Sicilia, Italia|{{0.0, 54398.0, C...|Europe/Rome|     1.0|{59.2, Partially ...|
+-------+------+--------------------+--------------------+--------+---------+---------+---------------+--------------------+-----------+--------+--------------------+



In [6]:
df_desanidado_day = df_days_exploded.select(
    col("day.cloudcover").alias("day_cloudcover"),
    col("day.conditions").alias("day_conditions"),
    col("day.datetime").alias("day_datetime"),
    col("day.datetimeEpoch").alias("day_datetimeEpoch"),
    col("day.description").alias("day_description"),
    col("day.dew").alias("day_dew"),
    col("day.feelslike").alias("day_feelslike"),
    col("day.feelslikemax").alias("day_feelslikemax"),
    col("day.feelslikemin").alias("day_feelslikemin"),
    col("day.hours").alias("day_hours"), # ARRAY
    col("day.humidity").alias("day_humidity"),
    col("day.icon").alias("day_icon"),
    col("day.moonphase").alias("day_moonphase"),
    col("day.precip").alias("day_precip"),
    col("day.precipcover").alias("day_precipcover"),
    col("day.precipprob").alias("day_precipprob"),
    col("day.preciptype").alias("day_preciptype"), # ARRAY
    col("day.pressure").alias("day_pressure"),
    col("day.severerisk").alias("day_severerisk"),
    col("day.snow").alias("day_snow"),
    col("day.snowdepth").alias("day_snowdepth"),
    col("day.solarenergy").alias("day_solarenergy"),
    col("day.solarradiation").alias("day_solarradiation"),
    col("day.source").alias("day_source"),
    col("day.stations").alias("day_stations"), # ARRAY
    col("day.sunrise").alias("day_sunrise"),
    col("day.sunriseEpoch").alias("day_sunriseEpoch"),
    col("day.sunset").alias("day_sunset"),
    col("day.sunsetEpoch").alias("day_sunsetEpoch"),
    col("day.temp").alias("day_temp"),
    col("day.tempmax").alias("day_tempmax"),
    col("day.tempmin").alias("day_tempmin"),
    col("day.uvindex").alias("day_uvindex"),
    col("day.visibility").alias("day_visibility"),
    col("day.winddir").alias("day_winddir"),
    col("day.windgust").alias("day_windgust"),
    col("day.windspeed").alias("day_windspeed")
)

df_Day = df_desanidado_day.withColumn('day_hour', explode('day_hours')).drop('day_hours')
    

In [85]:
df_explode_DayHour.show(5)

+------------------+------------------+----------------+---------------------+-----------+-----------------+----------------+------------+--------------+------------------+----------------+------------------+------------+-----------------+-------------------+----------------------+--------------+------------+---------------+------------------+---------------+----------------+-----------------+--------------------------+------------------------+
|dayHour_cloudcover|dayHour_conditions|dayHour_datetime|dayHour_datetimeEpoch|dayHour_dew|dayHour_feelslike|dayHour_humidity|dayHour_icon|dayHour_precip|dayHour_precipprob|dayHour_pressure|dayHour_severerisk|dayHour_snow|dayHour_snowdepth|dayHour_solarenergy|dayHour_solarradiation|dayHour_source|dayHour_temp|dayHour_uvindex|dayHour_visibility|dayHour_winddir|dayHour_windgust|dayHour_windspeed|dayHour_preciptype_explode|dayHour_stations_explode|
+------------------+------------------+----------------+---------------------+-----------+------------

In [78]:
df_Day.show(5)

+--------------+----------------+------------+-----------------+--------------------+-------+-------------+----------------+----------------+------------+-----------------+-------------+----------+---------------+--------------+--------------+------------+--------------+--------+-------------+---------------+------------------+----------+-------------------+-----------+----------------+----------+---------------+--------+-----------+-----------+-----------+--------------+-----------+------------+-------------+--------------------+
|day_cloudcover|  day_conditions|day_datetime|day_datetimeEpoch|     day_description|day_dew|day_feelslike|day_feelslikemax|day_feelslikemin|day_humidity|         day_icon|day_moonphase|day_precip|day_precipcover|day_precipprob|day_preciptype|day_pressure|day_severerisk|day_snow|day_snowdepth|day_solarenergy|day_solarradiation|day_source|       day_stations|day_sunrise|day_sunriseEpoch|day_sunset|day_sunsetEpoch|day_temp|day_tempmax|day_tempmin|day_uvindex|da

In [75]:
df_Day.printSchema()

root
 |-- day_cloudcover: double (nullable = true)
 |-- day_conditions: string (nullable = true)
 |-- day_datetime: string (nullable = true)
 |-- day_datetimeEpoch: long (nullable = true)
 |-- day_description: string (nullable = true)
 |-- day_dew: double (nullable = true)
 |-- day_feelslike: double (nullable = true)
 |-- day_feelslikemax: double (nullable = true)
 |-- day_feelslikemin: double (nullable = true)
 |-- day_humidity: double (nullable = true)
 |-- day_icon: string (nullable = true)
 |-- day_moonphase: double (nullable = true)
 |-- day_precip: double (nullable = true)
 |-- day_precipcover: double (nullable = true)
 |-- day_precipprob: double (nullable = true)
 |-- day_preciptype: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- day_pressure: double (nullable = true)
 |-- day_severerisk: double (nullable = true)
 |-- day_snow: double (nullable = true)
 |-- day_snowdepth: double (nullable = true)
 |-- day_solarenergy: double (nullable = true)
 |-- d

In [7]:
df_DayHour = df_Day.select(
    col('day_hour.cloudcover').alias('dayHour_cloudcover'),
    col('day_hour.conditions').alias('dayHour_conditions'),
    col('day_hour.datetime').alias('dayHour_datetime'),
    col('day_hour.datetimeEpoch').alias('dayHour_datetimeEpoch'),
    col('day_hour.dew').alias('dayHour_dew'),
    col('day_hour.feelslike').alias('dayHour_feelslike'),
    col('day_hour.humidity').alias('dayHour_humidity'),
    col('day_hour.icon').alias('dayHour_icon'),
    col('day_hour.precip').alias('dayHour_precip'),
    col('day_hour.precipprob').alias('dayHour_precipprob'),
    col('day_hour.preciptype').alias('dayHour_preciptype'),
    col('day_hour.pressure').alias('dayHour_pressure'),
    col('day_hour.severerisk').alias('dayHour_severerisk'),
    col('day_hour.snow').alias('dayHour_snow'),
    col('day_hour.snowdepth').alias('dayHour_snowdepth'),
    col('day_hour.solarenergy').alias('dayHour_solarenergy'),
    col('day_hour.solarradiation').alias('dayHour_solarradiation'),
    col('day_hour.source').alias('dayHour_source'),
    col('day_hour.stations').alias('dayHour_stations'),
    col('day_hour.temp').alias('dayHour_temp'),
    col('day_hour.uvindex').alias('dayHour_uvindex'),
    col('day_hour.visibility').alias('dayHour_visibility'),
    col('day_hour.winddir').alias('dayHour_winddir'),
    col('day_hour.windgust').alias('dayHour_windgust'),
    col('day_hour.windspeed').alias('dayHour_windspeed')
)

In [92]:
df_DayHour.show(5)

+------------------+------------------+----------------+---------------------+-----------+-----------------+----------------+-------------------+--------------+------------------+------------------+----------------+------------------+------------+-----------------+-------------------+----------------------+--------------+-------------------+------------+---------------+------------------+---------------+----------------+-----------------+
|dayHour_cloudcover|dayHour_conditions|dayHour_datetime|dayHour_datetimeEpoch|dayHour_dew|dayHour_feelslike|dayHour_humidity|       dayHour_icon|dayHour_precip|dayHour_precipprob|dayHour_preciptype|dayHour_pressure|dayHour_severerisk|dayHour_snow|dayHour_snowdepth|dayHour_solarenergy|dayHour_solarradiation|dayHour_source|   dayHour_stations|dayHour_temp|dayHour_uvindex|dayHour_visibility|dayHour_winddir|dayHour_windgust|dayHour_windspeed|
+------------------+------------------+----------------+---------------------+-----------+-----------------+------

In [8]:
from pyspark.sql.functions import when, lit

# Reemplazo de valores NULL en columna 'dayHour_preciptype' para posterior 'explode'
df_DayHour_filled = (
    df_DayHour
    .withColumn(
        'dayHour_preciptype', when(
            col('dayHour_preciptype').isNull(),
            lit(['Sin Dato'])
        ).otherwise(col('dayHour_preciptype'))
))

# Eexplode para el dataframe "df_DayHour_filled"
df_explode_DayHour = (
    df_DayHour_filled
    .withColumn('dayHour_preciptype_explode', explode('dayHour_preciptype'))
    .withColumn('dayHour_stations_explode', explode('dayHour_stations'))
    .drop('dayHour_preciptype', 'dayHour_stations')
)

In [9]:
df_Day = df_Day.withColumn('day_stations_explode', explode('day_stations')).drop('day_stations')

In [22]:
# Extrae la fecha del archivo JSON para agregar al dataframe de "df_explode_DayHour"
from pyspark.sql.functions import lit
import re

ruta_archivo = 'Datos\datos_climaticos2025-01-09.json'

fecha_extraida = re.search(r'\d{4}-\d{2}-\d{2}', ruta_archivo).group(0)

df_explode_DayHour = df_explode_DayHour.withColumn('day_datetime', lit(fecha_extraida))

In [23]:
df_explode_DayHour.show()

+------------------+------------------+----------------+---------------------+-----------+-----------------+----------------+-------------------+--------------+------------------+----------------+------------------+------------+-----------------+-------------------+----------------------+--------------+------------+---------------+------------------+---------------+----------------+-----------------+--------------------------+------------------------+------------+
|dayHour_cloudcover|dayHour_conditions|dayHour_datetime|dayHour_datetimeEpoch|dayHour_dew|dayHour_feelslike|dayHour_humidity|       dayHour_icon|dayHour_precip|dayHour_precipprob|dayHour_pressure|dayHour_severerisk|dayHour_snow|dayHour_snowdepth|dayHour_solarenergy|dayHour_solarradiation|dayHour_source|dayHour_temp|dayHour_uvindex|dayHour_visibility|dayHour_winddir|dayHour_windgust|dayHour_windspeed|dayHour_preciptype_explode|dayHour_stations_explode|day_datetime|
+------------------+------------------+----------------+------

In [16]:
df_Day.count()

72

In [21]:
df_explode_DayHour.orderBy(col('dayHour_datetime').desc()).show()

+------------------+------------------+----------------+---------------------+-----------+-----------------+----------------+-----------------+--------------+------------------+----------------+------------------+------------+-----------------+-------------------+----------------------+--------------+------------+---------------+------------------+---------------+----------------+-----------------+--------------------------+------------------------+
|dayHour_cloudcover|dayHour_conditions|dayHour_datetime|dayHour_datetimeEpoch|dayHour_dew|dayHour_feelslike|dayHour_humidity|     dayHour_icon|dayHour_precip|dayHour_precipprob|dayHour_pressure|dayHour_severerisk|dayHour_snow|dayHour_snowdepth|dayHour_solarenergy|dayHour_solarradiation|dayHour_source|dayHour_temp|dayHour_uvindex|dayHour_visibility|dayHour_winddir|dayHour_windgust|dayHour_windspeed|dayHour_preciptype_explode|dayHour_stations_explode|
+------------------+------------------+----------------+---------------------+-----------+--

In [None]:
# DESANIDAR LOS SUBCAMPOS DE "currentConditions"
from pyspark.sql.functions import col
from pyspark.sql.functions import explode

# Explode de columnas tipo Array
df_days_exploded = df1.withColumn("day", explode(col("days"))).drop("days")
#df_alerts_exploded = df1.withColumn('alert', explode(col('alerts'))).drop('alerts')
# Desanidado de columnas tipo Struct
#df_desanidado_currentConditions = df1.select(
#     col('currentConditions.cloudcover').alias('cloudcover'),
#     col('currentConditions.conditions').alias('conditions'),
#     col('currentConditions.datetime').alias('datetime'),
#     col('currentConditions.datetimeEpoch').alias('datetimeEpoch'),
#     col('currentConditions.dew').alias('dew'),
#     col('currentConditions.feelslike').alias('feelslike'),
#     col('currentConditions.humidity').alias('humidity'),
#     col('currentConditions.icon').alias('icon'),
#     col('currentConditions.moonphase').alias('moonphase'),
#     col('currentConditions.precip').alias('precip'),
#     col('currentConditions.precipprob').alias('precipprob'),
#     col('currentConditions.preciptype').alias('preciptype'),
#     col('currentConditions.pressure').alias('pressure'),
#     col('currentConditions.snow').alias('snow'),
#     col('currentConditions.snowdepth').alias('snowdepth'),
#     col('currentConditions.solarenergy').alias('solarenergy'),
#     col('currentConditions.solarradiation').alias('solarradiation'),
#     col('currentConditions.source').alias('source'),
#     col('currentConditions.stations').alias('stations'),
#     col('currentConditions.sunrise').alias('sunrise'),
#     col('currentConditions.sunriseEpoch').alias('sunriseEpoch'),
#     col('currentConditions.sunset').alias('sunset'),
#     col('currentConditions.sunsetEpoch').alias('sunsetEpoch'),
#     col('currentConditions.temp').alias('temp'),
#     col('currentConditions.uvindex').alias('uvindex'),
#     col('currentConditions.visibility').alias('visibility'),
#     col('currentConditions.winddir').alias('winddir'),
#     col('currentConditions.windgust').alias('windgust'),
#     col('currentConditions.windspeed').alias('windspeed') 
# )

#df_desanidado_days = df_days_exploded.select(
#     col("day.cloudcover").alias("day_cloudcover"),
#     col("day.conditions").alias("conditions"),
#     col("day.datetime").alias("datetime"),
#     col("day.datetimeEpoch").alias("day_datetimeEpoch"),
#     col("day.description").alias("day_description"),
#     col("day.dew").alias("day_dew"),
#     col("day.feelslike").alias("day_feelslike"),
#     col("day.feelslikemax").alias("day_feelslikemax"),
#     col("day.feelslikemin").alias("day_feelslikemin"),
#     col("day.hours").alias("day_hours"), # ARRAY
#     col("day.humidity").alias("day_humidity"),
#     col("day.icon").alias("day_icon"),
#     col("day.moonphase").alias("day_moonphase"),
#     col("day.precip").alias("day_precip"),
#     col("day.precipcover").alias("day_precipcover"),
#     col("day.precipprob").alias("day_precipprob"),
#     col("day.preciptype").alias("day_preciptype"), # ARRAY
#     col("day.pressure").alias("day_pressure"),
#     col("day.severerisk").alias("day_severerisk"),
#     col("day.snow").alias("day_snow"),
#     col("day.snowdepth").alias("day_snowdepth"),
#     col("day.solarenergy").alias("day_solarenergy"),
#     col("day.solarradiation").alias("day_solarradiation"),
#     col("day.source").alias("day_source"),
#     col("day.stations").alias("day_stations"), # ARRAY
#     col("day.sunrise").alias("day_sunrise"),
#     col("day.sunriseEpoch").alias("day_sunriseEpoch"),
#     col("day.sunset").alias("day_sunset"),
#     col("day.sunsetEpoch").alias("day_sunsetEpoch"),
#     col("day.temp").alias("day_temp"),
#     col("day.tempmax").alias("day_tempmax"),
#     col("day.tempmin").alias("day_tempmin"),
#     col("day.uvindex").alias("day_uvindex"),
#     col("day.visibility").alias("day_visibility"),
#     col("day.winddir").alias("day_winddir"),
#     col("day.windgust").alias("day_windgust"),
#     col("day.windspeed").alias("day_windspeed")
# )

#df_dayHours_exploded = df_desanidado_days.withColumn('day_hour', explode(col('day_hours'))).drop('day_hours')

# df_desanidado_dayHours = df_dayHours_exploded.select(
#     col("day_hour.cloudcover").alias("day_cloudcover"),
#     col("day_hour.cloudcover").alias("day_cloudcover"),
#     col("day_hour.cloudcover").alias("day_cloudcover"),
#     col("day_hour.cloudcover").alias("day_cloudcover"),
#     col("day_hour.cloudcover").alias("day_cloudcover"),
#     col("day_hour.cloudcover").alias("day_cloudcover"),
#     col("day_hour.cloudcover").alias("day_cloudcover"),
#     col("day_hour.cloudcover").alias("day_cloudcover"),
#     col("day_hour.cloudcover").alias("day_cloudcover"),
#     col("day_hour.cloudcover").alias("day_cloudcover"),
#     col("day_hour.cloudcover").alias("day_cloudcover")
# )
# VER RESPUESTA DE CHAT GPT Y LA FUNCOINES BRINDADAS

In [8]:
df_days_exploded.printSchema()

root
 |-- address: string (nullable = true)
 |-- alerts: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- currentConditions: struct (nullable = true)
 |    |-- cloudcover: double (nullable = true)
 |    |-- conditions: string (nullable = true)
 |    |-- datetime: string (nullable = true)
 |    |-- datetimeEpoch: long (nullable = true)
 |    |-- dew: double (nullable = true)
 |    |-- feelslike: double (nullable = true)
 |    |-- humidity: double (nullable = true)
 |    |-- icon: string (nullable = true)
 |    |-- moonphase: double (nullable = true)
 |    |-- precip: double (nullable = true)
 |    |-- precipprob: double (nullable = true)
 |    |-- preciptype: string (nullable = true)
 |    |-- pressure: double (nullable = true)
 |    |-- snow: double (nullable = true)
 |    |-- snowdepth: double (nullable = true)
 |    |-- solarenergy: double (nullable = true)
 |    |-- solarradiation: double (nullable = true)
 |    |-- source: string (nullable = true)
 |    

In [7]:
df_dayHours_exploded.show()

+--------------+----------------+----------+-----------------+--------------------+-------+-------------+----------------+----------------+------------+-----------------+-------------+----------+---------------+--------------+--------------+------------+--------------+--------+-------------+---------------+------------------+----------+-------------------+-----------+----------------+----------+---------------+--------+-----------+-----------+-----------+--------------+-----------+------------+-------------+--------------------+
|day_cloudcover|      conditions|  datetime|day_datetimeEpoch|     day_description|day_dew|day_feelslike|day_feelslikemax|day_feelslikemin|day_humidity|         day_icon|day_moonphase|day_precip|day_precipcover|day_precipprob|day_preciptype|day_pressure|day_severerisk|day_snow|day_snowdepth|day_solarenergy|day_solarradiation|day_source|       day_stations|day_sunrise|day_sunriseEpoch|day_sunset|day_sunsetEpoch|day_temp|day_tempmax|day_tempmin|day_uvindex|day_vi

In [None]:
df_desanidado_days = df_days_exploded.select(
    col('days.element').alias('cloudcover')
  
)

In [18]:
df1.show()

+----------+----------------+--------+-------------+----+---------+--------+-----------------+---------+------+----------+----------+--------+----+---------+-----------+--------------+------+--------------------+--------+------------+--------+-----------+----+-------+----------+-------+--------+---------+
|cloudcover|      conditions|datetime|datetimeEpoch| dew|feelslike|humidity|             icon|moonphase|precip|precipprob|preciptype|pressure|snow|snowdepth|solarenergy|solarradiation|source|            stations| sunrise|sunriseEpoch|  sunset|sunsetEpoch|temp|uvindex|visibility|winddir|windgust|windspeed|
+----------+----------------+--------+-------------+----+---------+--------+-----------------+---------+------+----------+----------+--------+----+---------+-----------+--------------+------+--------------------+--------+------------+--------+-----------+----+-------+----------+-------+--------+---------+
|      25.0|Partially cloudy|14:50:00|   1736430600|41.6|     64.5|    43.1|par

In [None]:
# DESANIDAR UNA SOLA COLUMNA (REALIZARLO INDIVIDUALMENTE NO TODO EL DF) Y VER COMO RESULTA. 
# BUSCAR EN DISTINTAS FUENTES, NO SOLO CHATGPT