In [14]:
import csv
import json
import os
import psycopg2
from datetime import datetime

In [15]:
master_folder_path = r'/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps'
sub_folders_list = [os.path.join(master_folder_path, name) for name in os.listdir(master_folder_path) if os.path.isdir(os.path.join(master_folder_path, name))]

sub_folders_list

['/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-29',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-05-02',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-28',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-05-03',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-30',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-05-06',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-25',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-05-10',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-05-08',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-26',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-

In [16]:
def epoch_to_timestamp(epoch_ms):
    return datetime.fromtimestamp(int(epoch_ms) / 1000).strftime('%Y-%m-%d %H:%M:%S')

In [17]:
USED_BUS_LINES = ['100', '108', '232', '2336', '2803', '292', '298', '3', '309', '315', '324', '328', '343', '355', '371', '388', 
                  '397', '399', '415', '422', '457', '483', '497', '550', '553', '554', '557', '565', '606', '624', '629', '634', 
                  '638', '639', '665', '756', '759', '774', '779', '803', '838', '852', '864', '867', '878', '905', '917', '918'] # SELECT DISTINCT FROM
TABLE_COLUMNS = ['ordem', 'latitude', 'longitude', 'datahora', 'velocidade',
                'linha', 'datahoraenvio', 'datahoraservidor', 
                'datahora_ts', 'datahoraenvio_ts', 'datahoraservidor_ts']

In [18]:
def create_csv_file(input_json_path):
    print(f"Creating csv for {input_json_path}")
    with open(input_json_path) as json_file:
        data = json.load(json_file)
    
    # Write data to CSV
    with open(input_json_path.replace("json", "csv"), 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(TABLE_COLUMNS)
        for item in data:
            if item['linha'] not in USED_BUS_LINES:
                continue
            latitude = item['latitude'].replace(',', '.')
            longitude = item['longitude'].replace(',', '.')
            datahora_ts = epoch_to_timestamp(item["datahora"])
            datahoraenvio_ts = epoch_to_timestamp(item["datahoraenvio"])
            datahoraservidor_ts = epoch_to_timestamp(item["datahoraservidor"])
            writer.writerow([item['ordem'], latitude, longitude, item['datahora'], 
                             item['velocidade'], item['linha'], item['datahoraenvio'], 
                             item['datahoraservidor'], datahora_ts, datahoraenvio_ts, datahoraservidor_ts])

In [19]:
created_csv = []
for folder in sub_folders_list:
    for filename in os.listdir(folder):
        if filename.startswith('2024') and filename.endswith('json'):
            file_path = os.path.join(folder, filename)
            created_csv.append(file_path.replace("json", "csv"))
            # create_csv_file(file_path)

In [20]:
created_csv

['/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_14.csv',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_22.csv',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_06.csv',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_23.csv',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_19.csv',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_07.csv',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_02.csv',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_13.csv',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_09.csv',
 '/home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/tas

In [21]:
def copy_data_from_csv_to_dados_treino(csv_file_path, conn):
    try:
        cursor = conn.cursor()
        with open(csv_file_path, 'r') as f:
            col_order = ', '.join(map(str, TABLE_COLUMNS))
            cursor.copy_expert(f"COPY dados_gps ({col_order}) FROM STDIN DELIMITER ',' CSV HEADER", f)
        conn.commit()
        print(f"Data loaded from {csv_file_path} successfully")
    except Exception as e:
        print(f"Error: {e}")
        conn.rollback()

In [22]:
try:
    conn = psycopg2.connect(host='localhost', database='gps_onibus_rj', user='postgres', password='admin')
    for file_path in created_csv:
        copy_data_from_csv_to_dados_treino(file_path, conn)
    
except psycopg2.Error as e:
    print(f"Error connecting to PostgreSQL database: {e}")

finally:
    if conn is not None:
        conn.close()

Data loaded from /home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_14.csv successfully
Data loaded from /home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_22.csv successfully
Data loaded from /home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_06.csv successfully
Data loaded from /home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_23.csv successfully
Data loaded from /home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_19.csv successfully
Data loaded from /home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_07.csv successfully
Data loaded from /home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/2024-04-27/2024-04-27_02.csv successfully
Data loaded from /home/victor/Documents/UFRJ/Data Mining/coc602-data-mining/task_03/gps/20