#### Importing librairies

In [2]:
import pandas as pd
import datetime
from sqlalchemy import create_engine

#### Importing and processing incidents

In [4]:
start_incidents = pd.read_csv("start_incidents/start_incidents_2018_01_01_2023_08_28.csv")
start_incidents = start_incidents[['UserName','Timestamp', 'Embedded_text','Emojis','Tweet URL']]
start_incidents.insert(0, 'start/end', 'start')
start_incidents['Timestamp'] = pd.to_datetime(start_incidents['Timestamp']) + datetime.timedelta(hours=1) # Add one hour to Timestamp in order to convert UTC to Paris time
start_incidents.head()

Unnamed: 0,start/end,UserName,Timestamp,Embedded_text,Emojis,Tweet URL
0,start,@Ligne9_RATP,2018-01-01 02:56:08+00:00,"02:53, la rame stationne à Nation en dir. de M...",,https://twitter.com/Ligne9_RATP/status/9476475...
1,start,@Ligne9_RATP,2018-01-01 19:14:08+00:00,"19:11, la rame stationne à Cx Chavaux en dir. ...",,https://twitter.com/Ligne9_RATP/status/9478937...
2,start,@Ligne9_RATP,2018-01-02 12:06:09+00:00,"12:02, le trafic est interrompu entre Rue des ...",,https://twitter.com/Ligne9_RATP/status/9481484...
3,start,@Ligne9_RATP,2018-01-02 18:05:08+00:00,"18:03, le trafic est perturbé sur la ligne (in...",,https://twitter.com/Ligne9_RATP/status/9482387...
4,start,@Ligne9_RATP,2018-01-02 23:38:08+00:00,"23:35, le trafic est interrompu entre Trocader...",,https://twitter.com/Ligne9_RATP/status/9483225...


In [7]:
end_incidents = pd.read_csv("end_incidents/end_incidents_2018_01_01_2023_08_28.csv")
end_incidents = end_incidents[['UserName','Timestamp', 'Embedded_text','Emojis','Tweet URL']]
end_incidents.insert(0, 'start/end', 'end')
end_incidents['Timestamp'] = pd.to_datetime(end_incidents['Timestamp']) + datetime.timedelta(hours=1) # Add one hour to Timestamp in order to convert UTC to Paris time
end_incidents.head()

Unnamed: 0,start/end,UserName,Timestamp,Embedded_text,Emojis,Tweet URL
0,end,@Ligne9_RATP,2018-01-01 02:22:11+00:00,"Incident terminé (personne sur les voies), ret...",,https://twitter.com/Ligne9_RATP/status/9476390...
1,end,@Ligne9_RATP,2018-01-01 02:46:17+00:00,Retour à un trafic régulier sur l'ensemble de ...,,https://twitter.com/Ligne9_RATP/status/9476451...
2,end,@Ligne9_RATP,2018-01-02 12:11:08+00:00,"12:09, le trafic reprend progressivement (pann...",,https://twitter.com/Ligne9_RATP/status/9481496...
3,end,@Ligne9_RATP,2018-01-02 12:24:06+00:00,Retour à un trafic régulier sur l'ensemble de ...,,https://twitter.com/Ligne9_RATP/status/9481529...
4,end,@Ligne9_RATP,2018-01-02 18:26:07+00:00,Incident terminé.Retour à un trafic normal sur...,,https://twitter.com/Ligne9_RATP/status/9482440...


#### Merging start and end incidents

In [11]:
incidents = pd.concat([start_incidents, end_incidents])
incidents["Timestamp"] = incidents["Timestamp"].apply(lambda x: str(x))
incidents.sort_values(by=['Timestamp'], inplace=True)
incidents.reset_index(inplace=True)
incidents.drop(columns=['index'], inplace=True)
incidents["date"] = incidents["Timestamp"].apply(lambda x: x.split(" ")[0])

### Cleaning columns names

['start/end', 'UserName', 'Timestamp', 'Embedded_text', 'Emojis','Tweet URL', 'date'] -> ['start_end', 'username', 'timestamp', 'embedded_text', 'emojis', 'tweet_url', 'date']

In [13]:
incidents.columns = ['start_end', 'username', 'timestamp', 'embedded_text', 'emojis', 'tweet_url', 'date']

In [14]:
incidents.head()

Unnamed: 0,index,start_end,username,timestamp,embedded_text,emojis,tweet_url,date
0,0,end,@Ligne9_RATP,2018-01-01 02:22:11+00:00,"Incident terminé (personne sur les voies), ret...",,https://twitter.com/Ligne9_RATP/status/9476390...,2018-01-01
1,1,end,@Ligne9_RATP,2018-01-01 02:46:17+00:00,Retour à un trafic régulier sur l'ensemble de ...,,https://twitter.com/Ligne9_RATP/status/9476451...,2018-01-01
2,2,start,@Ligne9_RATP,2018-01-01 02:56:08+00:00,"02:53, la rame stationne à Nation en dir. de M...",,https://twitter.com/Ligne9_RATP/status/9476475...,2018-01-01
3,3,start,@Ligne9_RATP,2018-01-01 19:14:08+00:00,"19:11, la rame stationne à Cx Chavaux en dir. ...",,https://twitter.com/Ligne9_RATP/status/9478937...,2018-01-01
4,4,start,@Ligne9_RATP,2018-01-02 12:06:09+00:00,"12:02, le trafic est interrompu entre Rue des ...",,https://twitter.com/Ligne9_RATP/status/9481484...,2018-01-02


### Darta saving

In [None]:
engine = create_engine('postgresql://postgres:postgres@localhost:5432/Incidents_RATP')
incidents.to_sql('incidents', engine, if_exists='replace', index=False)