In [4]:
import os
import pandas as pd
import exifread
from hachoir.parser import createParser
from hachoir.metadata import extractMetadata
from fractions import Fraction

def get_exif_data(image_path):
    with open(image_path, 'rb') as img_file:
        tags = exifread.process_file(img_file)
        exif_data = {
            "file_name": os.path.basename(image_path),
            "latitude": None,
            "longitude": None,
            "altitude": None,
            "date_time": None,
        }

        # Extract GPS data if available
        gps_latitude = tags.get("GPS GPSLatitude")
        gps_latitude_ref = tags.get("GPS GPSLatitudeRef")
        gps_longitude = tags.get("GPS GPSLongitude")
        gps_longitude_ref = tags.get("GPS GPSLongitudeRef")
        gps_altitude = tags.get("GPS GPSAltitude")
        date_time_original = tags.get("EXIF DateTimeOriginal")

        def convert_to_degrees(value):
            parts = str(value).strip('[]').replace(' ', '').split(',')
            d = float(Fraction(parts[0]))
            m = float(Fraction(parts[1]))
            s = float(Fraction(parts[2]))
            return d + (m / 60.0) + (s / 3600.0)

        if gps_latitude and gps_latitude_ref and gps_longitude and gps_longitude_ref:
            lat = convert_to_degrees(gps_latitude)
            lon = convert_to_degrees(gps_longitude)

            exif_data["latitude"] = lat * (-1 if gps_latitude_ref.values[0] == 'S' else 1)
            exif_data["longitude"] = lon * (-1 if gps_longitude_ref.values[0] == 'W' else 1)
        
        if gps_altitude:
            exif_data["altitude"] = float(gps_altitude.values[0])

        if date_time_original:
            exif_data["date_time"] = str(date_time_original)

    return exif_data

def get_video_metadata(video_path):
    parser = createParser(video_path)
    metadata = extractMetadata(parser)
    video_data = {
        "file_name": os.path.basename(video_path),
        "latitude": None,
        "longitude": None,
        "altitude": None,
        "date_time": None,
    }

    if metadata:
        for item in metadata.exportPlaintext():
            if "Creation date" in item:
                video_data["date_time"] = item.split(": ")[1].strip()
            if "GPS coordinates" in item:
                coords = item.split(": ")[1].strip().split(", ")
                lat, lon = coords[0].split(" "), coords[1].split(" ")
                video_data["latitude"] = float(lat[0]) * (-1 if lat[1] == 'S' else 1)
                video_data["longitude"] = float(lon[0]) * (-1 if lon[1] == 'W' else 1)

    return video_data

def process_files(folder):
    files = [f for f in os.listdir(folder) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.mov'))]
    data_list = []

    for file in files:
        file_path = os.path.join(folder, file)
        if file.lower().endswith(('.jpg', '.jpeg', '.png')):
            data = get_exif_data(file_path)
        elif file.lower().endswith('.mov'):
            data = get_video_metadata(file_path)
        data_list.append(data)

    return data_list

# Ruta de la carpeta de fotos y videos
folder_path = 'C:/Users/34670/Desktop/python/coast_to_coast/viaje_usa/fotos_usa'

# Procesar archivos y crear DataFrame
data_list = process_files(folder_path)
df_fotos_videos = pd.DataFrame(data_list)

# Mostrar el DataFrame
df_fotos_videos.head()


PNG file does not have exif data.
PNG file does not have exif data.
PNG file does not have exif data.


Unnamed: 0,file_name,latitude,longitude,altitude,date_time
0,IMG_3600.JPG,40.6465,-73.789667,,2013:07:28 19:02:00
1,IMG_3603.JPG,40.703667,-74.097,,2013:07:29 10:05:23
2,IMG_3605.JPG,40.690667,-74.0425,2.444101,2013:07:29 10:07:50
3,IMG_3607.JPG,40.690167,-74.042667,6.688098,2013:07:29 10:08:16
4,IMG_3613.JPG,40.689,-74.043667,22.880661,2013:07:29 10:21:47


In [5]:
df_fotos_videos

Unnamed: 0,file_name,latitude,longitude,altitude,date_time
0,IMG_3600.JPG,40.646500,-73.789667,,2013:07:28 19:02:00
1,IMG_3603.JPG,40.703667,-74.097000,,2013:07:29 10:05:23
2,IMG_3605.JPG,40.690667,-74.042500,2.444101,2013:07:29 10:07:50
3,IMG_3607.JPG,40.690167,-74.042667,6.688098,2013:07:29 10:08:16
4,IMG_3613.JPG,40.689000,-74.043667,22.880661,2013:07:29 10:21:47
...,...,...,...,...,...
903,IMG_4931.JPG,37.809667,-122.410333,11.170458,2013:08:15 21:39:50
904,IMG_4932.JPG,37.778333,-122.391500,,2013:08:15 22:51:22
905,IMG_4933.JPG,37.778333,-122.391500,,2013:08:15 22:51:22
906,IMG_4934.JPG,37.628667,-122.400667,28.823656,2013:08:15 23:19:45


In [11]:
# Cargar el DataFrame
df = pd.read_csv('C:/Users/34670/Desktop/python/coast_to_coast/viaje_usa/data/metadata_singps.csv')
df.head()

Unnamed: 0,file_name,date_time
0,IMG_5457.JPG,2013:07:27 20:07:08
1,IMG_5460.JPG,2013:07:28 09:34:27
2,IMG_5461.JPG,2013:07:29 01:55:50
3,IMG_5464.JPG,2013:07:29 01:58:11
4,IMG_5465.JPG,2013:07:29 02:20:45


In [12]:
# Convertir la columna date_time a tipo datetime
df['date_time'] = pd.to_datetime(df['date_time'], format='%Y:%m:%d %H:%M:%S')

# Ordenar el DataFrame por fecha
df = df.sort_values(by='date_time').reset_index(drop=True)

# Crear una nueva columna para almacenar la jornada
df['jornada'] = 0

# Obtener las fechas únicas en orden
fechas_unicas = df['date_time'].dt.date.unique()

# Asignar el número de jornada a cada fecha
for idx, fecha in enumerate(fechas_unicas):
    df.loc[df['date_time'].dt.date == fecha, 'jornada'] = f'dia_{idx + 1}'

# Mostrar el DataFrame con la nueva columna
df.head(10)


  df.loc[df['date_time'].dt.date == fecha, 'jornada'] = f'dia_{idx + 1}'


Unnamed: 0,file_name,date_time,jornada
0,IMG_5457.JPG,2013-07-27 20:07:08,dia_1
1,IMG_5460.JPG,2013-07-28 09:34:27,dia_2
2,IMG_5461.JPG,2013-07-29 01:55:50,dia_3
3,IMG_5464.JPG,2013-07-29 01:58:11,dia_3
4,IMG_5465.JPG,2013-07-29 02:20:45,dia_3
5,IMG_5467.JPG,2013-07-29 02:22:28,dia_3
6,IMG_5469.JPG,2013-07-29 02:26:44,dia_3
7,IMG_5474.JPG,2013-07-29 02:59:51,dia_3
8,IMG_5480.JPG,2013-07-29 03:05:52,dia_3
9,IMG_5483.JPG,2013-07-29 03:14:26,dia_3


In [13]:

# Supongamos que 'conteo_jornadas' es la serie obtenida anteriormente
conteo_jornadas = df['jornada'].value_counts()

# Convertir a DataFrame para facilitar el manejo
conteo_jornadas_df = conteo_jornadas.reset_index()
conteo_jornadas_df.columns = ['jornada', 'conteo']

# Extraer el número de la jornada y agregarlo como una nueva columna
conteo_jornadas_df['numero'] = conteo_jornadas_df['jornada'].str.extract('(\d+)', expand=False).astype(int)

# Ordenar el DataFrame por el número de la jornada
conteo_jornadas_df = conteo_jornadas_df.sort_values(by='numero').reset_index(drop=True)

# Eliminar la columna 'numero' si no la necesitas más
conteo_jornadas_df = conteo_jornadas_df.drop(columns='numero')

# Mostrar el DataFrame ordenado
conteo_jornadas_df


Unnamed: 0,jornada,conteo
0,dia_1,1
1,dia_2,1
2,dia_3,60
3,dia_4,126
4,dia_5,178
5,dia_6,56
6,dia_7,85
7,dia_8,105
8,dia_9,33
9,dia_10,208
