# Exploratory Data Analysis (EDA)

In this notebook, we will perform exploratory data analysis on the Tunisian stock market data to gain insights and identify patterns.


In [1]:
# Importing necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go

In [2]:
# Reading the data
data = pd.read_csv('./data/cleaned_weekly_stock_market.csv')

# Display the first few rows of the dataset
data.head()

Unnamed: 0,companyName,date,openingPrice,highestPrice,lowestPrice,closingPrice,volume
0,SOTUMAG,2014-06-16,1.76,1.8,1.73,1.73,44315.0
1,SOTUMAG,2014-06-23,1.78,1.82,1.77,1.82,9551.0
2,SOTUMAG,2014-06-30,1.81,1.82,1.81,1.81,2401.0
3,SOTUMAG,2014-07-07,1.78,1.82,1.78,1.8,10341.0
4,SOTUMAG,2014-07-14,1.83,1.83,1.76,1.81,27480.0


## Summary Statistics

We will start by generating summary statistics for the dataset to understand its basic properties.


In [3]:
# Summary statistics
summary_stats = data.describe()
summary_stats


Unnamed: 0,openingPrice,highestPrice,lowestPrice,closingPrice,volume
count,38631.0,38631.0,38631.0,38631.0,38631.0
mean,106.641003,107.559123,105.715594,106.777993,282125.0
std,791.642044,797.612176,785.73526,792.626816,2504922.0
min,0.11,0.11,0.11,0.11,0.0
25%,2.93,2.99,2.85,2.93,1610.0
50%,6.69,6.8,6.53,6.7,10039.0
75%,14.705,14.91,14.47,14.71,39017.5
max,9592.97,9670.22,9578.67,9663.86,112422300.0


In [4]:
data.shape

(38631, 7)

## Companies Stock Prices

we will visualize the stock prices of the companies in the dataset to identify patterns and trends.

In [5]:
data["companyName"].unique()

array(['SOTUMAG', 'TUNISAIR', 'STEQ', 'BANQUE ATTIJARI DE TUNIS',
       'HANNIBAL LEASE', 'AMI', 'CELLCOM', 'SIPHAT', 'TUNISIE VALEURS',
       'POULINA GROUP HOLDING', 'PLACEMENTS DE TUNISIE - SICAF', 'SIAME',
       'ONE TECH', 'TUNISIE LEASING & FACTORING', 'SOPAT',
       'ASSURANCES MAGHREBIA', 'WIFAK INT BANK', 'MAGASIN GENERAL',
       'SERVICOM', 'MIP', 'SOTUVER', 'UNIMED', 'SOTIPAPIER',
       'MEUBLES INTERIEURS', 'ESSOUKNA', 'STB BANK', 'UIB',
       'MAGHREBIA VIE', 'AMEN BANK', 'BH ASSURANCE',
       'SOCIETE CHIMIQUE ALKIMIA', 'BH BANK', 'CEREALIS', 'SOTRAPIL',
       'ATTIJARI LEASING', 'CITY CARS', 'MONOPRIX', 'CARTHAGE CEMENT',
       'ICF', 'BANQUE NATIONALE AGRICOLE', 'TPR', 'ELBENE', 'SOTEMAIL',
       'BH LEASING', 'ARAB TUNISIAN BANK', 'DELICE HOLDING', 'SOTETEL',
       'ARTES', 'SANIMED', 'TUNISIAN BOND INDEX', 'UADH', 'TAWASOL',
       'SMART TUNISIE', 'ASSAD', 'SYPHAX AIRLINES', 'TELNET HOLDING',
       'TUNINVEST - SICAR', 'SFBT', 'GIF FILTER', 'UBCI', "LAND

In [8]:
data['date'] = pd.to_datetime(data['date'])

data_on_specific_date = data[data['date'] == '2024-06-10']

companies_with_data_on_specific_date = data_on_specific_date['companyName'].unique()

filtered_data = data[data['companyName'].isin(companies_with_data_on_specific_date)]

print(filtered_data)
print(filtered_data['companyName'].unique())

      companyName       date  openingPrice  highestPrice  lowestPrice  \
0         SOTUMAG 2014-06-16          1.76          1.80         1.73   
1         SOTUMAG 2014-06-23          1.78          1.82         1.77   
2         SOTUMAG 2014-06-30          1.81          1.82         1.81   
3         SOTUMAG 2014-07-07          1.78          1.82         1.78   
4         SOTUMAG 2014-07-14          1.83          1.83         1.76   
...           ...        ...           ...           ...          ...   
38626     SOMOCER 2024-05-13          0.52          0.52         0.46   
38627     SOMOCER 2024-05-20          0.50          0.56         0.48   
38628     SOMOCER 2024-05-27          0.56          0.57         0.50   
38629     SOMOCER 2024-06-03          0.53          0.55         0.49   
38630     SOMOCER 2024-06-10          0.50          0.51         0.49   

       closingPrice    volume  
0              1.73   44315.0  
1              1.82    9551.0  
2              1.81    2401

### SOTUMAG Stock Prices

In [7]:
# Ensure all required columns are present
required_columns = ['date', 'openingPrice', 'highestPrice', 'lowestPrice', 'closingPrice', 'volume']
if not all(column in data.columns for column in required_columns):
    missing = [column for column in required_columns if column not in data.columns]
    raise ValueError(f"The DataFrame is missing the required columns: {missing}")

data['date'] = pd.to_datetime(data['date'])

fig = go.Figure()

# Add traces for price data
fig.add_trace(go.Scatter(
    x=data['date'],
    y=data['openingPrice'],
    mode='lines',
    name='Opening Price',
    hovertemplate='%{x|%Y-%m-%d} - %{y:.2f}<extra></extra>',
))

fig.add_trace(go.Scatter(
    x=data['date'],
    y=data['highestPrice'],
    mode='lines',
    name='Highest Price',
    hovertemplate='%{x|%Y-%m-%d} - %{y:.2f}<extra></extra>',
))

fig.add_trace(go.Scatter(
    x=data['date'],
    y=data['lowestPrice'],
    mode='lines',
    name='Lowest Price',
    hovertemplate='%{x|%Y-%m-%d} - %{y:.2f}<extra></extra>',
))

fig.add_trace(go.Scatter(
    x=data['date'],
    y=data['closingPrice'],
    mode='lines',
    name='Closing Price',
    hovertemplate='%{x|%Y-%m-%d} - %{y:.2f}<extra></extra>',
))

# Add a bar chart for volume
fig.add_trace(go.Bar(
    x=data['date'],
    y=data['volume'],
    name='Volume',
    marker_color='rgba(100, 200, 102, 0.5)',
    hovertemplate='%{x|%Y-%m-%d} - %{y}<extra></extra>',
))

fig.update_layout(
    title='Stock Market Data of SOTUMAG',
    xaxis_title='Date',
    yaxis_title='Price',
    hovermode='x unified',
    template='plotly_white',
    showlegend=True,
)

# Create a second y-axis for the volume
fig.update_layout(
    yaxis=dict(
        title='Price',
        tickformat=".2f",
    ),
    yaxis2=dict(
        title='Volume',
        overlaying='y',
        side='right',
        showgrid=False,  
    )
)

# Assign the volume trace to the second y-axis
fig.data[-1].update(yaxis='y2')

fig.show()