In [79]:
import pandas as pd
import plotly.express as px
import requests
import time
import datetime

## 1. Data Profiling:

In [80]:
df = pd.read_csv("/content/Riyadh_Metro_Dataset_last - Sheet1-2.csv")

In [81]:
df.head()

Unnamed: 0,line,station name,neighborhood,streets covered,coordinates,interchange train,interchange com bus,interchange rapid bus,parking availability
0,Blue 11,SABB,Alaarid,King Fahad Rd,"24.8296182,46.616385",0,0,0,1
1,Blue 12,DR Suliman AlHabib,Alyasmin,King Fahad Rd,"24.8115662,46.6256973",0,0,0,0
2,Blue 13,KAFD,Alaqiq,King Fahad Rd,"24.7674375,46.6430625",1,1,0,0
3,Blue 14,Al Murooj,Almuruj,King Fahad Rd,"24.7548628,46.6542393",0,0,0,0
4,Blue 15,King Fahad District,King Fahd,King Fahad Rd,"24.7454666,46.6589932",0,0,0,0


In [82]:
df.shape

(94, 9)

In [83]:
df.columns

Index(['line', 'station name', 'neighborhood', 'streets covered',
       'coordinates', 'interchange train', 'interchange com bus',
       'interchange rapid bus', 'parking availability '],
      dtype='object')

### Data Quality Checks

#### 1. Reliability:

Evaluate the data's source and collection process to determine its trustworthiness.

In [84]:
# the data is collected by ourselves from https://www.rcrc.gov.sa/ar/projects/public-transport-project-riyadh and was checked with google maps

#### 2. Timeliness:

Ensure the data is up-to-date and reflective of the current situation or the period of interest for the analysis.

In [85]:
# The data is up-to-date

#### 3. Consistency:

Confirm that the data is consistent within the dataset and across multiple data sources. For example, the same data point should not have different values in different places.

In [86]:
# -

#### 4. Relevance:
Assess whether the data is appropriate and applicable for the intended analysis. Data that is not relevant can skew results and lead to incorrect conclusions.

In [87]:
# the data is relevant to our objective analysis and we don't need to delete any columns.

#### 5. Uniqueness:
Check for and remove duplicate records to prevent skewed analysis results.

In [88]:
df.duplicated().sum()

0

#### 6. Completeness:
Ensure that no critical data is missing. This might mean checking for null values or required fields that are empty.

In [89]:
df.isnull().sum()

Unnamed: 0,0
line,0
station name,0
neighborhood,0
streets covered,0
coordinates,0
interchange train,0
interchange com bus,0
interchange rapid bus,0
parking availability,0


#### 7. Check Accuracy:

Verify that the data is correct and precise. This could involve comparing data samples with known sources or using validation rules.
**The process includes:**
1. Validating the appropriateness of data types for the dataset.
2. Identifying outliers  using established validation  rule

In [90]:
df.dtypes

Unnamed: 0,0
line,object
station name,object
neighborhood,object
streets covered,object
coordinates,object
interchange train,int64
interchange com bus,int64
interchange rapid bus,int64
parking availability,int64


In [91]:
df.head()

Unnamed: 0,line,station name,neighborhood,streets covered,coordinates,interchange train,interchange com bus,interchange rapid bus,parking availability
0,Blue 11,SABB,Alaarid,King Fahad Rd,"24.8296182,46.616385",0,0,0,1
1,Blue 12,DR Suliman AlHabib,Alyasmin,King Fahad Rd,"24.8115662,46.6256973",0,0,0,0
2,Blue 13,KAFD,Alaqiq,King Fahad Rd,"24.7674375,46.6430625",1,1,0,0
3,Blue 14,Al Murooj,Almuruj,King Fahad Rd,"24.7548628,46.6542393",0,0,0,0
4,Blue 15,King Fahad District,King Fahd,King Fahad Rd,"24.7454666,46.6589932",0,0,0,0


In [92]:
df[['latitude', 'longitude']] = df['coordinates'].str.split(",", expand=True)

In [93]:
df.head()

Unnamed: 0,line,station name,neighborhood,streets covered,coordinates,interchange train,interchange com bus,interchange rapid bus,parking availability,latitude,longitude
0,Blue 11,SABB,Alaarid,King Fahad Rd,"24.8296182,46.616385",0,0,0,1,24.8296182,46.616385
1,Blue 12,DR Suliman AlHabib,Alyasmin,King Fahad Rd,"24.8115662,46.6256973",0,0,0,0,24.8115662,46.6256973
2,Blue 13,KAFD,Alaqiq,King Fahad Rd,"24.7674375,46.6430625",1,1,0,0,24.7674375,46.6430625
3,Blue 14,Al Murooj,Almuruj,King Fahad Rd,"24.7548628,46.6542393",0,0,0,0,24.7548628,46.6542393
4,Blue 15,King Fahad District,King Fahd,King Fahad Rd,"24.7454666,46.6589932",0,0,0,0,24.7454666,46.6589932


In [94]:
df.drop(["coordinates"], axis= 1, inplace= True)

In [95]:
df.head()

Unnamed: 0,line,station name,neighborhood,streets covered,interchange train,interchange com bus,interchange rapid bus,parking availability,latitude,longitude
0,Blue 11,SABB,Alaarid,King Fahad Rd,0,0,0,1,24.8296182,46.616385
1,Blue 12,DR Suliman AlHabib,Alyasmin,King Fahad Rd,0,0,0,0,24.8115662,46.6256973
2,Blue 13,KAFD,Alaqiq,King Fahad Rd,1,1,0,0,24.7674375,46.6430625
3,Blue 14,Al Murooj,Almuruj,King Fahad Rd,0,0,0,0,24.7548628,46.6542393
4,Blue 15,King Fahad District,King Fahd,King Fahad Rd,0,0,0,0,24.7454666,46.6589932


In [96]:
df['latitude'] = df['latitude'].astype(float)

In [97]:
df['longitude'] = df['longitude'].astype(float)

In [98]:
df.dtypes

Unnamed: 0,0
line,object
station name,object
neighborhood,object
streets covered,object
interchange train,int64
interchange com bus,int64
interchange rapid bus,int64
parking availability,int64
latitude,float64
longitude,float64


## 2. Data Analysis:
In this section we analyze the dataset to understand it further

In [99]:
df[(df['interchange train'] == 1)]

Unnamed: 0,line,station name,neighborhood,streets covered,interchange train,interchange com bus,interchange rapid bus,parking availability,latitude,longitude
2,Blue 13,KAFD,Alaqiq,King Fahad Rd,1,1,0,0,24.7674375,46.6430625
6,Blue 17,STC,Alwurud,King Fahad Rd,1,1,0,0,24.7266373,46.6670073
15,Blue 26,National Museum,Alfutah,King Fahad Rd,1,1,0,0,24.6459853,46.7146782
17,Blue 28,Qasr Al Hokm,Alqari,King Fahad Rd,1,1,0,1,24.6290566,46.7162845
29,Red 15,STC,Alwurud,King Abdullad Rd,1,1,0,0,24.7266373,46.6670073
32,Red 18,Ministry of Education,King Salman,King Abdullad Rd,1,0,0,0,24.7406772,46.6947625
36,Red 22,Al Hamra,Alhamra,King Abdullad Rd,1,0,0,1,24.7764653,46.7764096
40,Green 11,Ministry of Education,King Salman,Prince Nasir Ibn Farhan Al Saud Rd,1,0,0,0,24.7406772,46.6947625
51,Green 22,National Museum,Al Futah,King Abdulaziz Rd,1,1,0,0,24.6459853,46.7146782
61,Orange 20,Qasr Al Hokm,Alqari,Al madinah Al Munawwarh Rd,1,1,0,1,24.6290566,46.7162845


In [100]:
df[(df['interchange train'] == 1) & (df['interchange com bus'] == 1)]

Unnamed: 0,line,station name,neighborhood,streets covered,interchange train,interchange com bus,interchange rapid bus,parking availability,latitude,longitude
2,Blue 13,KAFD,Alaqiq,King Fahad Rd,1,1,0,0,24.7674375,46.6430625
6,Blue 17,STC,Alwurud,King Fahad Rd,1,1,0,0,24.7266373,46.6670073
15,Blue 26,National Museum,Alfutah,King Fahad Rd,1,1,0,0,24.6459853,46.7146782
17,Blue 28,Qasr Al Hokm,Alqari,King Fahad Rd,1,1,0,1,24.6290566,46.7162845
29,Red 15,STC,Alwurud,King Abdullad Rd,1,1,0,0,24.7266373,46.6670073
51,Green 22,National Museum,Al Futah,King Abdulaziz Rd,1,1,0,0,24.6459853,46.7146782
61,Orange 20,Qasr Al Hokm,Alqari,Al madinah Al Munawwarh Rd,1,1,0,1,24.6290566,46.7162845
74,Yellow 11,KAFD,Alaqiq,King Fahd Rd,1,1,0,0,24.7674375,46.6430625
75,Yellow 12,Al Rabi,Alrabi,Prince Mohammed Bin Salman Rd,1,1,1,1,24.786267,46.6601472
83,Purple 11,KAFD,Alaqiq,King Fahd Rd,1,1,0,0,24.7674375,46.6430625


In [101]:
df[(df['interchange train'] == 1) & (df['interchange com bus'] == 1) & (df['interchange rapid bus'] == 1)]

Unnamed: 0,line,station name,neighborhood,streets covered,interchange train,interchange com bus,interchange rapid bus,parking availability,latitude,longitude
75,Yellow 12,Al Rabi,Alrabi,Prince Mohammed Bin Salman Rd,1,1,1,1,24.786267,46.6601472
84,Purple 12,Al Rabi,Alrabi,Prince Mohammed Bin Salman Rd,1,1,1,1,24.786267,46.6601472


In [102]:
df[(df['interchange train'] == 1)& (df['interchange com bus'] == 1) & (df['interchange rapid bus'] == 1) & (df['parking availability '] == 1)]

Unnamed: 0,line,station name,neighborhood,streets covered,interchange train,interchange com bus,interchange rapid bus,parking availability,latitude,longitude
75,Yellow 12,Al Rabi,Alrabi,Prince Mohammed Bin Salman Rd,1,1,1,1,24.786267,46.6601472
84,Purple 12,Al Rabi,Alrabi,Prince Mohammed Bin Salman Rd,1,1,1,1,24.786267,46.6601472


In [103]:
df[(df['interchange train'] == 1) & (df['parking availability '] == 1)]

Unnamed: 0,line,station name,neighborhood,streets covered,interchange train,interchange com bus,interchange rapid bus,parking availability,latitude,longitude
17,Blue 28,Qasr Al Hokm,Alqari,King Fahad Rd,1,1,0,1,24.6290566,46.7162845
36,Red 22,Al Hamra,Alhamra,King Abdullad Rd,1,0,0,1,24.7764653,46.7764096
61,Orange 20,Qasr Al Hokm,Alqari,Al madinah Al Munawwarh Rd,1,1,0,1,24.6290566,46.7162845
71,Orange 30,An Naseem,Alnasim Algharbi,Prince Nayef Ibn Abdulaziz Rd,1,0,1,1,24.7012889,46.8298925
75,Yellow 12,Al Rabi,Alrabi,Prince Mohammed Bin Salman Rd,1,1,1,1,24.786267,46.6601472
84,Purple 12,Al Rabi,Alrabi,Prince Mohammed Bin Salman Rd,1,1,1,1,24.786267,46.6601472
89,Purple 17,Al Hamra\t,Alhamra,Eastern Ring Rd,1,0,0,1,24.7764653,46.7764096
93,Purple 21,An Naseem,Alnasim Algharbi,Abdul Rahman Ibn Awf Rd,1,0,1,1,24.7012889,46.8298925


In [104]:
df[df['streets covered'] == 'Airport Rd']

Unnamed: 0,line,station name,neighborhood,streets covered,interchange train,interchange com bus,interchange rapid bus,parking availability,latitude,longitude
78,Yellow 15,PNU,King Khalid Airport,Airport Rd,0,0,0,0,24.8548059,46.7114199
79,Yellow 16,Governmental Complex,King Khalid Airport,Airport Rd,0,0,0,1,24.8680636,46.6978301
80,Yellow 18,Airport T5,King Khalid Airport,Airport Rd,0,0,0,0,24.9405667,46.7104558
81,Yellow 19,Airport T3-4,King Khalid Airport,Airport Rd,0,0,0,0,24.9557809,46.7024155
82,Yellow 20,Airport T1-2,King Khalid Airport,Airport Rd,0,0,0,0,24.961072,46.6987871
87,Purple 15,Granada,Granada,Airport Rd,0,0,0,0,24.7864329,46.7291437


In [105]:
df[df['line'].str.contains('yellow', case=False, na=False)]

Unnamed: 0,line,station name,neighborhood,streets covered,interchange train,interchange com bus,interchange rapid bus,parking availability,latitude,longitude
74,Yellow 11,KAFD,Alaqiq,King Fahd Rd,1,1,0,0,24.7674375,46.6430625
75,Yellow 12,Al Rabi,Alrabi,Prince Mohammed Bin Salman Rd,1,1,1,1,24.786267,46.6601472
76,Yellow 13,Othman Bin Affan,Alnada,Prince Mohammed Bin Salman Rd,1,0,0,0,24.8013559,46.6958954
77,Yellow 14,SABIC,Alfalah,Prince Mohammed Ibn Salman Ibn Abdulaziz Rd,1,0,0,0,24.8071083,46.7100517
78,Yellow 15,PNU,King Khalid Airport,Airport Rd,0,0,0,0,24.8548059,46.7114199
79,Yellow 16,Governmental Complex,King Khalid Airport,Airport Rd,0,0,0,1,24.8680636,46.6978301
80,Yellow 18,Airport T5,King Khalid Airport,Airport Rd,0,0,0,0,24.9405667,46.7104558
81,Yellow 19,Airport T3-4,King Khalid Airport,Airport Rd,0,0,0,0,24.9557809,46.7024155
82,Yellow 20,Airport T1-2,King Khalid Airport,Airport Rd,0,0,0,0,24.961072,46.6987871


In [106]:
!pip install -U kaleido




In [107]:
df.columns

Index(['line', 'station name', 'neighborhood', 'streets covered',
       'interchange train', 'interchange com bus', 'interchange rapid bus',
       'parking availability ', 'latitude', 'longitude'],
      dtype='object')

## 3. Metro Map :
In this section, we reviewed the six paths on the map

In [108]:
pd.options.display.float_format = '{:.7f}'.format

df['line'] = df['line'].str.strip()


color_discrete_map = {
    'Blue': 'cornflowerblue',
    'Red': 'indianred',
    'Green': 'mediumseagreen',
    'Orange': 'orange',
    'Yellow': 'yellow',
    'Purple': 'mediumpurple'
}

df['line_color'] = df['line'].str.extract(r'(\w+)')

interchange_stations = df['station name'].value_counts()[df['station name'].value_counts() > 1].index

df['is_interchange'] = df['station name'].apply(lambda x: x in interchange_stations)

fig = px.scatter_mapbox(
    df,
    lat='latitude',
    lon='longitude',
    text='station name',
    color='line_color',
    zoom=10,
    mapbox_style='open-street-map',
    title="Riyadh Metro Stations Map",
    color_discrete_map=color_discrete_map,
    hover_data={
        'latitude': ':.7f',
        'longitude': ':.7f',
        'station name': True,
        'line_color': True
    }
)

fig.update_traces(marker=dict(size=10))

interchange_data = df[df['is_interchange'] == True]

interchange_fig = px.scatter_mapbox(
    interchange_data,
    lat='latitude',
    lon='longitude',
    text='station name',
    color_discrete_sequence=['black'], # indicate to interchange station
    zoom=10
)

interchange_fig.update_traces(marker=dict(size=15))

fig.add_trace(interchange_fig.data[0])

fig.update_layout(
    height=800,
    width=1000
)

fig.show()


In [109]:
import plotly.express as px
import pandas as pd

# Ensure the float format is as desired
pd.options.display.float_format = '{:.7f}'.format

# Clean the 'line' column by stripping any extra spaces
df['line'] = df['line'].str.strip()

# Define the color mapping for different metro lines
color_discrete_map = {
    'Blue': '#429CC6',
    'Red': '#E44646',
    'Green': '#3FB548',
    'Orange': '#F68D39',
    'Yellow': '#FFD831',
    'Purple': '#95489B'
}

# Extract the metro routes from the 'line' column
df['مسارات الميترو'] = df['line'].str.extract(r'(\w+)')

# Identify interchange stations (stations that appear more than once)
interchange_stations = df['station name'].value_counts()[df['station name'].value_counts() > 1].index

# Create a boolean column to flag interchange stations
df['is_interchange'] = df['station name'].apply(lambda x: x in interchange_stations)

# Create the main scatter mapbox plot
fig = px.scatter_mapbox(
    df,
    lat='latitude',
    lon='longitude',
    text='station name',
    color='line',  # Use the 'line' column for color
    zoom=10,
    mapbox_style='open-street-map',
    title="خريطة ميترو الرياض من تجميع عالمات البيانات",
    color_discrete_map=color_discrete_map,
    hover_data={
        'latitude': ':.7f',
        'longitude': ':.7f',
        'station name': True,
        'line': True
    }
)

# Update the marker size for the main map
fig.update_traces(marker=dict(size=10))

# Filter data for interchange stations and plot them
interchange_data = df[df['is_interchange'] == True]

interchange_fig = px.scatter_mapbox(
    interchange_data,
    lat='latitude',
    lon='longitude',
    text='station name',
    color_discrete_sequence=['black'],  # Use black for interchange stations
    zoom=10
)

# Update the marker size for interchange stations
interchange_fig.update_traces(marker=dict(size=15))

# Add the interchange trace to the main figure
fig.add_trace(interchange_fig.data[0])

# Update the layout of the plot
fig.update_layout(
    height=800,
    width=1000
)

# Show the final plot
fig.show()


In [110]:
import plotly.express as px
import pandas as pd

# Ensure the float format is as desired
pd.options.display.float_format = '{:.7f}'.format

# Clean the 'line' column by stripping any extra spaces
df['line'] = df['line'].str.strip()

# Define the color mapping for different metro lines
color_discrete_map = {
    'Blue': '#429CC6',
    'Red': '#E44646',
    'Green': '#3FB548',
    'Orange': '#F68D39',
    'Yellow': '#FFD831',
    'Purple': '#95489B'
}

# Extract the metro routes from the 'line' column
df['مسارات الميترو'] = df['line'].str.extract(r'(\w+)')

# Identify interchange stations (stations that appear more than once)
interchange_stations = df['station name'].value_counts()[df['station name'].value_counts() > 1].index

# Create a boolean column to flag interchange stations
df['is_interchange'] = df['station name'].apply(lambda x: x in interchange_stations)

# Set the order of the lines to ensure they appear grouped together in the legend
line_order = ['Blue', 'Red', 'Green', 'Orange', 'Yellow', 'Purple']
df['line'] = pd.Categorical(df['line'], categories=line_order, ordered=True)

# Create the main scatter mapbox plot
fig = px.scatter_mapbox(
    df,
    lat='latitude',
    lon='longitude',
    text='station name',
    color='line',  # Use the 'line' column for color
    zoom=10,
    mapbox_style='open-street-map',
    title="خريطة ميترو الرياض من تجميع عالمات البيانات",
    color_discrete_map=color_discrete_map,
    hover_data={
        'latitude': ':.7f',
        'longitude': ':.7f',
        'station name': True,
        'line': True
    }
)

# Update the marker size for the main map
fig.update_traces(marker=dict(size=10))

# Filter data for interchange stations and plot them
interchange_data = df[df['is_interchange'] == True]

interchange_fig = px.scatter_mapbox(
    interchange_data,
    lat='latitude',
    lon='longitude',
    text='station name',
    color_discrete_sequence=['black'],  # Use black for interchange stations
    zoom=10
)

# Update the marker size for interchange stations
interchange_fig.update_traces(marker=dict(size=15))

# Add the interchange trace to the main figure
fig.add_trace(interchange_fig.data[0])

# Update the layout of the plot
fig.update_layout(
    height=800,
    width=1000
)

# Show the final plot
fig.show()


In [111]:
import plotly.express as px
import pandas as pd

# Ensure the float format is as desired
pd.options.display.float_format = '{:.7f}'.format

# Clean the 'line' column by stripping any extra spaces
df['line'] = df['line'].str.strip()

# Define the color mapping for different metro lines
color_discrete_map = {
    'Blue': '#429CC6',  # Blue line color
    'Red': '#E44646',   # Red line color
    'Green': '#3FB548', # Green line color
    'Orange': '#F68D39',# Orange line color
    'Yellow': '#FFD831',# Yellow line color
    'Purple': '#95489B' # Purple line color
}

# Extract the metro routes from the 'line' column
df['مسارات الميترو'] = df['line'].str.extract(r'(\w+)')

# Identify interchange stations (stations that appear more than once)
interchange_stations = df['station name'].value_counts()[df['station name'].value_counts() > 1].index

# Create a boolean column to flag interchange stations
df['is_interchange'] = df['station name'].apply(lambda x: x in interchange_stations)

# Create the main scatter mapbox plot, with color based on the 'line' column
fig = px.scatter_mapbox(
    df,
    lat='latitude',
    lon='longitude',
    text='station name',
    color='line',  # Use the 'line' column for color
    zoom=10,
    mapbox_style='open-street-map',
    title="خريطة ميترو الرياض من تجميع عالمات البيانات",
    color_discrete_map=color_discrete_map,  # Apply the predefined color mapping
    hover_data={
        'latitude': ':.7f',
        'longitude': ':.7f',
        'station name': True,
        'line': True
    }
)

# Update the marker size for the main map
fig.update_traces(marker=dict(size=10))

# Filter data for interchange stations and plot them
interchange_data = df[df['is_interchange'] == True]

interchange_fig = px.scatter_mapbox(
    interchange_data,
    lat='latitude',
    lon='longitude',
    text='station name',
    color_discrete_sequence=['black'],  # Use black for interchange stations
    zoom=10
)

# Update the marker size for interchange stations
interchange_fig.update_traces(marker=dict(size=15))

# Add the interchange trace to the main figure
fig.add_trace(interchange_fig.data[0])

# Update the layout of the plot
fig.update_layout(
    height=800,
    width=1000
)

# Show the final plot
fig.show()


In [112]:
import plotly.express as px
import pandas as pd

# Ensure the float format is as desired
pd.options.display.float_format = '{:.7f}'.format

# Clean the 'line' column by stripping any extra spaces
df['line'] = df['line'].str.strip()

# Define the color mapping for different metro lines
color_discrete_map = {
    'Blue': '#429CC6',  # Blue line color
    'Red': '#E44646',   # Red line color
    'Green': '#3FB548', # Green line color
    'Orange': '#F68D39',# Orange line color
    'Yellow': '#FFD831',# Yellow line color
    'Purple': '#95489B' # Purple line color
}

# Extract the metro routes from the 'line' column
df['مسارات الميترو'] = df['line'].str.extract(r'(\w+)')

# Identify interchange stations (stations that appear more than once)
interchange_stations = df['station name'].value_counts()[df['station name'].value_counts() > 1].index

# Create a boolean column to flag interchange stations
df['is_interchange'] = df['station name'].apply(lambda x: x in interchange_stations)

# Create the main scatter mapbox plot, with color based on the 'line' column
fig = px.scatter_mapbox(
    df,
    lat='latitude',
    lon='longitude',
    text='station name',
    color='line',  # Use the 'line' column for color (correct column name)
    zoom=10,
    mapbox_style='open-street-map',
    title="خريطة ميترو الرياض من تجميع عالمات البيانات",
    color_discrete_map=color_discrete_map,  # Apply the predefined color mapping
    hover_data={
        'latitude': ':.7f',
        'longitude': ':.7f',
        'station name': True,
        'line': True  # Hover over to show the line column
    }
)

# Update the marker size for the main map
fig.update_traces(marker=dict(size=10))

# Filter data for interchange stations and plot them
interchange_data = df[df['is_interchange'] == True]

interchange_fig = px.scatter_mapbox(
    interchange_data,
    lat='latitude',
    lon='longitude',
    text='station name',
    color_discrete_sequence=['black'],  # Use black for interchange stations
    zoom=10
)

# Update the marker size for interchange stations
interchange_fig.update_traces(marker=dict(size=15))

# Add the interchange trace to the main figure
fig.add_trace(interchange_fig.data[0])

# Update the layout of the plot
fig.update_layout(
    height=800,
    width=1000
)

# Show the final plot
fig.show()


In [113]:
import plotly.express as px
import pandas as pd

# Ensure the float format is as desired
pd.options.display.float_format = '{:.7f}'.format

# Clean the 'line' column by stripping any extra spaces
df['line'] = df['line'].str.strip()

# Check the unique values in the 'line' column
print(df['line'].unique())

# Define the color mapping for different metro lines
color_discrete_map = {
    'Blue': '#429CC6',  # Blue line color
    'Red': '#E44646',   # Red line color
    'Green': '#3FB548', # Green line color
    'Orange': '#F68D39',# Orange line color
    'Yellow': '#FFD831',# Yellow line color
    'Purple': '#95489B' # Purple line color
}

# Ensure that the 'line' values match the keys in the color map
df['line'] = df['line'].replace({
    'Blue': 'Blue',
    'Red': 'Red',
    'Green': 'Green',
    'Orange': 'Orange',
    'Yellow': 'Yellow',
    'Purple': 'Purple'
})

# Identify interchange stations (stations that appear more than once)
interchange_stations = df['station name'].value_counts()[df['station name'].value_counts() > 1].index

# Create a boolean column to flag interchange stations
df['is_interchange'] = df['station name'].apply(lambda x: x in interchange_stations)

# Create the main scatter mapbox plot, with color based on the 'line' column
fig = px.scatter_mapbox(
    df,
    lat='latitude',
    lon='longitude',
    text='station name',
    color='line',  # Use the 'line' column for color
    zoom=10,
    mapbox_style='open-street-map',
    title="خريطة ميترو الرياض من تجميع عالمات البيانات",
    color_discrete_map=color_discrete_map,  # Apply the predefined color mapping
    hover_data={
        'latitude': ':.7f',
        'longitude': ':.7f',
        'station name': True,
        'line': True  # Hover over to show the line column
    }
)

# Update the marker size for the main map
fig.update_traces(marker=dict(size=10))

# Filter data for interchange stations and plot them
interchange_data = df[df['is_interchange'] == True]

interchange_fig = px.scatter_mapbox(
    interchange_data,
    lat='latitude',
    lon='longitude',
    text='station name',
    color_discrete_sequence=['black'],  # Use black for interchange stations
    zoom=10
)

# Update the marker size for interchange stations
interchange_fig.update_traces(marker=dict(size=15))

# Add the interchange trace to the main figure
fig.add_trace(interchange_fig.data[0])

# Update the layout of the plot
fig.update_layout(
    height=800,
    width=1000
)

# Show the final plot
fig.show()


[nan]


In [114]:
import plotly.express as px
import pandas as pd

# Ensure the float format is as desired
pd.options.display.float_format = '{:.7f}'.format

# Clean the 'line' column by stripping any extra spaces
df['line'] = df['line'].str.strip()

# Define the color mapping for different metro lines
color_discrete_map = {
    'Blue': '#429CC6',  # Blue line color
    'Red': '#E44646',   # Red line color
    'Green': '#3FB548', # Green line color
    'Orange': '#F68D39',# Orange line color
    'Yellow': '#FFD831',# Yellow line color
    'Purple': '#95489B' # Purple line color
}

# Ensure the 'line' values match the keys in the color map
df['line'] = df['line'].replace({
    'Blue': 'Blue',
    'Red': 'Red',
    'Green': 'Green',
    'Orange': 'Orange',
    'Yellow': 'Yellow',
    'Purple': 'Purple'
})

# Identify interchange stations (stations that appear more than once)
interchange_stations = df['station name'].value_counts()[df['station name'].value_counts() > 1].index

# Create a boolean column to flag interchange stations
df['is_interchange'] = df['station name'].apply(lambda x: x in interchange_stations)

# Create the main scatter mapbox plot, with color based on the 'line' column
fig = px.scatter_mapbox(
    df[df['is_interchange'] == False],  # Only non-interchange stations
    lat='latitude',
    lon='longitude',
    text='station name',
    color='line',  # Use the 'line' column for color
    zoom=10,
    mapbox_style='open-street-map',
    title="خريطة ميترو الرياض من تجميع عالمات البيانات",
    color_discrete_map=color_discrete_map,  # Apply the predefined color mapping
    hover_data={
        'latitude': ':.7f',
        'longitude': ':.7f',
        'station name': True,
        'line': True  # Hover over to show the line column
    }
)

# Update the marker size for the main map
fig.update_traces(marker=dict(size=10))

# Filter data for interchange stations and plot them
interchange_data = df[df['is_interchange'] == True]

interchange_fig = px.scatter_mapbox(
    interchange_data,
    lat='latitude',
    lon='longitude',
    text='station name',
    color_discrete_sequence=['black'],  # Use black for interchange stations
    zoom=10
)

# Update the marker size for interchange stations
interchange_fig.update_traces(marker=dict(size=15))

# Add the interchange trace to the main figure
fig.add_trace(interchange_fig.data[0])

# Update the layout of the plot
fig.update_layout(
    height=800,
    width=1000
)

# Show the final plot
fig.show()


AttributeError: Can only use .str accessor with string values!

In [None]:
import plotly.express as px
import pandas as pd

# Ensure the float format is as desired
pd.options.display.float_format = '{:.7f}'.format

# Convert the 'line' column to string and handle NaNs
df['line'] = df['line'].astype(str).str.strip()

# Define the color mapping for different metro lines
color_discrete_map = {
    'Blue': '#429CC6',  # Blue line color
    'Red': '#E44646',   # Red line color
    'Green': '#3FB548', # Green line color
    'Orange': '#F68D39',# Orange line color
    'Yellow': '#FFD831',# Yellow line color
    'Purple': '#95489B' # Purple line color
}

# Ensure the 'line' values match the keys in the color map
df['line'] = df['line'].replace({
    'Blue': 'Blue',
    'Red': 'Red',
    'Green': 'Green',
    'Orange': 'Orange',
    'Yellow': 'Yellow',
    'Purple': 'Purple'
})

# Identify interchange stations (stations that appear more than once)
interchange_stations = df['station name'].value_counts()[df['station name'].value_counts() > 1].index

# Create a boolean column to flag interchange stations
df['is_interchange'] = df['station name'].apply(lambda x: x in interchange_stations)

# Create the main scatter mapbox plot, with color based on the 'line' column
fig = px.scatter_mapbox(
    df[df['is_interchange'] == False],  # Only non-interchange stations
    lat='latitude',
    lon='longitude',
    text='station name',
    color='line',  # Use the 'line' column for color
    zoom=10,
    mapbox_style='open-street-map',
    title="خريطة ميترو الرياض من تجميع عالمات البيانات",
    color_discrete_map=color_discrete_map,  # Apply the predefined color mapping
    hover_data={
        'latitude': ':.7f',
        'longitude': ':.7f',
        'station name': True,
        'line': True  # Hover over to show the line column
    }
)

# Update the marker size for the main map
fig.update_traces(marker=dict(size=10))

# Filter data for interchange stations and plot them
interchange_data = df[df['is_interchange'] == True]

interchange_fig = px.scatter_mapbox(
    interchange_data,
    lat='latitude',
    lon='longitude',
    text='station name',
    color_discrete_sequence=['black'],  # Use black for interchange stations
    zoom=10
)

# Update the marker size for interchange stations
interchange_fig.update_traces(marker=dict(size=15))

# Add the interchange trace to the main figure
fig.add_trace(interchange_fig.data[0])

# Update the layout of the plot
fig.update_layout(
    height=800,
    width=1000
)

# Show the final plot
fig.show()


In [None]:
df['line'].unique()