In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from scipy import stats
import warnings
warnings.filterwarnings("ignore")

In [2]:
data =  pd.read_csv('Revenue.csv')

In [3]:
data

Unnamed: 0,Year,Q1,Q2,Q3,Q4
0,2024,9.43,8.56,0.0,0.0
1,2023,8.7,8.72,9.17,9.37
2,2022,8.1,7.6,8.2,8.4
3,2021,6.7,6.7,7.5,8.1
4,2020,7.1,6.0,4.2,6.2
5,2019,6.6,6.31,6.82,6.75
6,2018,6.07,6.03,6.31,6.3
7,2017,5.73,5.29,5.66,5.7
8,2016,5.37,4.99,5.24,5.71
9,2015,4.8,4.56,4.88,4.91


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Year    16 non-null     int64  
 1   Q1      16 non-null     float64
 2   Q2      16 non-null     float64
 3   Q3      16 non-null     float64
 4   Q4      16 non-null     float64
dtypes: float64(4), int64(1)
memory usage: 772.0 bytes


In [5]:
data = data.drop([0]).reset_index(drop=True)

In [6]:
data

Unnamed: 0,Year,Q1,Q2,Q3,Q4
0,2023,8.7,8.72,9.17,9.37
1,2022,8.1,7.6,8.2,8.4
2,2021,6.7,6.7,7.5,8.1
3,2020,7.1,6.0,4.2,6.2
4,2019,6.6,6.31,6.82,6.75
5,2018,6.07,6.03,6.31,6.3
6,2017,5.73,5.29,5.66,5.7
7,2016,5.37,4.99,5.24,5.71
8,2015,4.8,4.56,4.88,4.91
9,2014,4.24,3.87,4.15,4.18


In [7]:
data['Q2'].mean()

np.float64(4.965333333333334)

In [8]:
import pandas as pd
import plotly.graph_objects as go

# Create a stacked bar plot
fig = go.Figure()

# Add a bar for each quarter
for quarter in ['Q1', 'Q2', 'Q3', 'Q4']:
    fig.add_trace(go.Bar(
        x=data['Year'],  # X-axis as Year
        y=data[quarter],  # y-axis as the revenue for the quarter
        name=quarter,  # Name for the legend
        text=data[quarter],  # Values to show on the bars
        textposition='inside',  # Position of text inside the bars
        hoverinfo='text'
    ))

# Calculate total revenue for each year and add annotations
data['Total Revenue'] = data[['Q1', 'Q2', 'Q3', 'Q4']].sum(axis=1)

# Update layout and add total revenue above bars
for index, row in data.iterrows():
    fig.add_annotation(
        y=row['Total Revenue'] + 1.5,  # Offset to position the text
        x=row['Year'],
        text=f"${row['Total Revenue']:.2f}",
        showarrow=False,
        font=dict(size=12)
    )

# Update layout
fig.update_layout(
    title='Total Revenue by Quarter Over Years',
    barmode='stack',  # Stacked bar mode
    xaxis_title='Year',
    yaxis_title='Revenue (in billion US Dollars)',
    height=600,
    width=800,
    showlegend=True
)

# Save the plot as a high-quality PNG file
# fig.write_image("./Plots/revenue_plot.png", width=1200, height=800, scale=3)

# Show the plot
fig.show()

In [9]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Convert Year to datetime
data['Year'] = pd.to_datetime(data['Year'], format='%Y')
data['Year'] = data['Year'].dt.year

# Calculate statistics for each year
data['Mean'] = data.iloc[:, 1:5].mean(axis=1)
data['Median'] = data.iloc[:, 1:5].median(axis=1)
data['StdDev'] = data.iloc[:, 1:5].std(axis=1)
data['Variance'] = data.iloc[:, 1:5].var(axis=1)

# Create subplots
fig = make_subplots(rows=2, cols=2, subplot_titles=("Mean", "Median", "Standard Deviation", "Variance"))

# Add traces for each statistic
fig.add_trace(go.Scatter(x=data['Year'], y=data['Mean'], mode='lines+markers', name='Mean'), row=1, col=1)
fig.add_trace(go.Scatter(x=data['Year'], y=data['Median'], mode='lines+markers', name='Median'), row=1, col=2)
fig.add_trace(go.Scatter(x=data['Year'], y=data['StdDev'], mode='lines+markers', name='Standard Deviation'), row=2, col=1)
fig.add_trace(go.Scatter(x=data['Year'], y=data['Variance'], mode='lines+markers', name='Variance'), row=2, col=2)


fig.add_annotation(
    x=data['Year'][3],
    y=data['Mean'][3],
    text=f'Mean: {data["Mean"][3]:.2f}',
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=-60,  # Adjust this value for vertical offset
    row=1, col=1
)

fig.add_annotation(
    x=data['Year'][3],
    y=data['Median'][3],
    text=f'Median: {data["Median"][3]:.2f}',
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=-60,  # Adjust this value for vertical offset
    row=1, col=2
)

fig.add_annotation(
    x=data['Year'][3],
    y=data['StdDev'][3],
    text=f'StdDev: {data["StdDev"][3]:.2f}',
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=-40,  # Adjust this value for vertical offset
    row=2, col=1
)

fig.add_annotation(
    x=data['Year'][3],
    y=data['Variance'][3],
    text=f'Variance: {data["Variance"][3]:.2f}',
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=-40,  # Adjust this value for vertical offset
    row=2, col=2
)

# Update layout
fig.update_layout(
    title='Revenue Statistics Over Time',
    height=800,
    width=1000,
    showlegend=False,
    xaxis=dict(title='Year'),
    xaxis2=dict(title='Year'),
    xaxis3=dict(title='Year'),
    xaxis4=dict(title='Year'),
    yaxis=dict(title='Revenue'),
    yaxis2=dict(title='Revenue'),
    yaxis3=dict(title='Revenue'),
    yaxis4=dict(title='Revenue')
)


# Save the plot as a high-quality PNG file
# fig.write_image("./Plots/revenue_Stats_plot.png", width=1200, height=800, scale=3)

# Show the plot
fig.show()


In [10]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Year           15 non-null     int32  
 1   Q1             15 non-null     float64
 2   Q2             15 non-null     float64
 3   Q3             15 non-null     float64
 4   Q4             15 non-null     float64
 5   Total Revenue  15 non-null     float64
 6   Mean           15 non-null     float64
 7   Median         15 non-null     float64
 8   StdDev         15 non-null     float64
 9   Variance       15 non-null     float64
dtypes: float64(9), int32(1)
memory usage: 1.2 KB


In [11]:
# Create the box plot
fig = go.Figure()

colors = ['blue', 'green', 'red', 'purple']
quarters = ['Q1', 'Q2', 'Q3', 'Q4']

annotations = []
xcount = 20
for i, quarter in enumerate(quarters):
    if i>2:
        xcount=-40
    quartile1 = data[quarter].quantile(0.25)
    median = data[quarter].median()
    quartile3 = data[quarter].quantile(0.75)
    
    fig.add_trace(go.Box(
        y=data[quarter],
        name=quarter,
        boxpoints='all',
        jitter=0.3,
        pointpos=-1.8,
        marker_color=colors[i],
        line_color=colors[i],
        hovertext=data['Year'],
        hovertemplate='Year: %{hovertext}<br>Revenue: %{y}<extra></extra>'
    ))
    
    # Add annotation for median
    annotations.append(dict(
        x=i, y=median,
        text=f'Median: {median:.2f}',
        showarrow=True,
        arrowhead=7,
        ax=0+xcount,
        ay=-40
    ))
    
    # Add annotations for Q1 and Q3
    annotations.append(dict(
        x=i, y=quartile1,
        text=f'Quartile 1: {quartile1:.2f}',
        showarrow=True,
        arrowhead=7,
        ax=-30+xcount,
        ay=20
    ))
    annotations.append(dict(
        x=i, y=quartile3,
        text=f'Quartile 3: {quartile3:.2f}',
        showarrow=True,
        arrowhead=7,
        ax=30+xcount,
        ay=-20
    ))

# Update layout
fig.update_layout(
    title='Distribution of Quarterly Revenue Across All Years',
    yaxis_title='Revenue',
    xaxis_title='Quarter',
    showlegend=False,
    height=800,  # Increased height to accommodate annotations
    width=1000,
    boxmode='group',
    annotations=annotations
)

# Save the plot as a high-quality PNG file
# fig.write_image("./Plots/revenue_distribution_plot.png", width=1200, height=800, scale=3)

# Show the plot
fig.show()

In [12]:
data

Unnamed: 0,Year,Q1,Q2,Q3,Q4,Total Revenue,Mean,Median,StdDev,Variance
0,2023,8.7,8.72,9.17,9.37,35.96,8.99,8.945,0.333567,0.111267
1,2022,8.1,7.6,8.2,8.4,32.3,8.075,8.15,0.340343,0.115833
2,2021,6.7,6.7,7.5,8.1,29.0,7.25,7.1,0.680686,0.463333
3,2020,7.1,6.0,4.2,6.2,23.5,5.875,6.1,1.214839,1.475833
4,2019,6.6,6.31,6.82,6.75,26.48,6.62,6.675,0.226127,0.051133
5,2018,6.07,6.03,6.31,6.3,24.71,6.1775,6.185,0.148183,0.021958
6,2017,5.73,5.29,5.66,5.7,22.38,5.595,5.68,0.205345,0.042167
7,2016,5.37,4.99,5.24,5.71,21.31,5.3275,5.305,0.299819,0.089892
8,2015,4.8,4.56,4.88,4.91,19.15,4.7875,4.84,0.158614,0.025158
9,2014,4.24,3.87,4.15,4.18,16.44,4.11,4.165,0.164317,0.027


In [13]:
import pandas as pd
import plotly.express as px
from scipy.stats import kurtosis

data['Year'] = pd.to_datetime(data['Year'], format='%Y')
# Calculate kurtosis for each year’s quarterly data
data['Kurtosis'] = data[['Q1', 'Q2', 'Q3', 'Q4']].apply(lambda row: round(kurtosis(row, fisher=True),2), axis=1)

# Create a bar plot for yearly kurtosis
fig = px.bar(
    data,
    x=data['Year'].dt.year,  # Use Year in the x-axis
    y='Kurtosis',
    text='Kurtosis',  # Display kurtosis values on bars
    title='Yearly Kurtosis Based on Quarterly Data',
    labels={'x': 'Year', 'Kurtosis': 'Kurtosis'}
)

# Customize layout
fig.update_layout(
    height=500, width=800,
    xaxis_title='Year',
    yaxis_title='Kurtosis',
)

# Show the plot
fig.show()

In [14]:
# Calculate kurtosis for each quarter across all years
quarterly_kurtosis = data[['Q1', 'Q2', 'Q3', 'Q4']].apply(lambda col: round(kurtosis(col, fisher=True), 2))

# Convert to DataFrame for plotting
quarterly_kurtosis = pd.DataFrame(quarterly_kurtosis, columns=['Kurtosis']).reset_index()
quarterly_kurtosis.columns = ['Quarter', 'Kurtosis']

# Create a bar plot for quarterly kurtosis
fig = px.bar(
    quarterly_kurtosis,
    x='Quarter',
    y='Kurtosis',
    text='Kurtosis',  # Display kurtosis values on bars
    title='Kurtosis for Each Quarter Across Years',
    labels={'Kurtosis': 'Kurtosis', 'Quarter': 'Quarter'}
)

# Customize layout
fig.update_layout(
    height=500, width=800,
    xaxis_title='Quarter',
    yaxis_title='Kurtosis'
)

# Show the plot
fig.show()

In [15]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Calculate Pearson correlation matrix
correlation_matrix = data.iloc[:, 1:6].corr(method='pearson')

# Create heatmap with annotated values
fig = go.Figure(data=go.Heatmap(
    z=correlation_matrix.values,  # Correlation values
    x=correlation_matrix.columns,  # Column labels
    y=correlation_matrix.columns,  # Row labels
    colorscale='Plasma',  # Choose a readable color palette
    zmin=-1, zmax=1,  # Range of correlation values
    colorbar=dict(title="Pearson Correlation")
))

# Annotate values inside the heatmap
for i in range(len(correlation_matrix)):
    for j in range(len(correlation_matrix.columns)):
        fig.add_annotation(
            x=correlation_matrix.columns[j],
            y=correlation_matrix.columns[i],
            text=f'{correlation_matrix.values[i, j]:.2f}',  # Annotate with correlation values
            showarrow=False,  # No arrow needed for annotation
            font=dict(color='white' if correlation_matrix.values[i, j] < 0.5 else 'black')  # Contrast text
        )

# Update layout for better display
fig.update_layout(
    title='Pearson Correlation Heatmap',
    height=600, width=800,
    xaxis=dict(tickangle=45)  # Rotate x-axis labels for readability
)

# Save the plot as a high-quality PNG file
# fig.write_image("./Plots/correlation_plot.png", width=1200, height=800, scale=3)

# Show the plot
fig.show()

In [16]:
data.tail()

Unnamed: 0,Year,Q1,Q2,Q3,Q4,Total Revenue,Mean,Median,StdDev,Variance,Kurtosis
10,2013-01-01,3.79,3.56,3.74,3.8,14.89,3.7225,3.765,0.111467,0.012425,-0.83
11,2012-01-01,3.44,3.2,3.3,3.36,13.3,3.325,3.33,0.10116,0.010233,-1.21
12,2011-01-01,2.95,2.79,2.93,3.03,11.7,2.925,2.94,0.099833,0.009967,-0.97
13,2010-01-01,2.72,2.53,2.61,2.84,10.7,2.675,2.665,0.134784,0.018167,-1.38
14,2009-01-01,2.62,2.33,2.4,2.42,9.77,2.4425,2.41,0.124466,0.015492,-0.86


In [18]:
import pandas as pd
import statsmodels.api as sm

# Melt the DataFrame to long format
long_data = pd.melt(data, id_vars=['Year'], value_vars=['Q1', 'Q2', 'Q3', 'Q4'],
                    var_name='Quarter', value_name='Quarter Revenue')

# Ensure the 'Year' is in integer format (remove datetime handling)
long_data['Year'] = long_data['Year'].dt.year

# Prepare data for regression
y = long_data['Quarter Revenue']
x = long_data['Year']  # Use 'Year' as the independent variable

# Add a constant (intercept) to the model
X = sm.add_constant(x)  # Add intercept for the regression model

# Fit the linear regression model
model = sm.OLS(y, X).fit()

# Get the predicted values for the best fit line
long_data['Predicted Revenue'] = model.predict(X)

# Optionally print the regression summary for inspection
print(model.summary())

# Now you have the predicted revenue values in `long_data['Predicted Revenue']`


                            OLS Regression Results                            
Dep. Variable:        Quarter Revenue   R-squared:                       0.934
Model:                            OLS   Adj. R-squared:                  0.933
Method:                 Least Squares   F-statistic:                     819.3
Date:                Fri, 14 Feb 2025   Prob (F-statistic):           6.63e-36
Time:                        13:18:48   Log-Likelihood:                -44.480
No. Observations:                  60   AIC:                             92.96
Df Residuals:                      58   BIC:                             97.15
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       -885.3928     31.115    -28.456      0.0

In [19]:
long_data

Unnamed: 0,Year,Quarter,Quarter Revenue,Predicted Revenue
0,2023,Q1,8.7,8.285479
1,2022,Q1,8.1,7.84372
2,2021,Q1,6.7,7.401961
3,2020,Q1,7.1,6.960202
4,2019,Q1,6.6,6.518443
5,2018,Q1,6.07,6.076685
6,2017,Q1,5.73,5.634926
7,2016,Q1,5.37,5.193167
8,2015,Q1,4.8,4.751408
9,2014,Q1,4.24,4.309649


In [22]:
import plotly.graph_objects as go

# Create a scatter plot
fig = go.Figure()

# Add scatter points for each quarter
for quarter in long_data['Quarter'].unique():
    fig.add_trace(go.Scatter(
        x=long_data[long_data['Quarter'] == quarter]['Quarter Revenue'],
        y=long_data[long_data['Quarter'] == quarter]['Year'],
        mode='markers',
        name=f'{quarter} Revenue',
        marker=dict(size=10)
    ))

# Add the best fit line
fig.add_trace(go.Scatter(
    x=long_data['Predicted Revenue'],  # Predicted values on the x-axis
    y=long_data['Year'],               # Year on the y-axis
    mode='lines',
    name='Best Fit Line',
    line=dict(color='red', width=2)
))

# Update layout
fig.update_layout(
    title='Quarterly Revenue with Best Fit Line',
    xaxis_title='Predicted Revenue',  # Change the x-axis title to 'Predicted Revenue'
    yaxis_title='Year',               # Change the y-axis title to 'Year'
    height=600,
    width=1000,
)

# Show the plot
fig.show()


In [23]:
data

Unnamed: 0,Year,Q1,Q2,Q3,Q4,Total Revenue,Mean,Median,StdDev,Variance,Kurtosis
0,2023-01-01,8.7,8.72,9.17,9.37,35.96,8.99,8.945,0.333567,0.111267,-1.77
1,2022-01-01,8.1,7.6,8.2,8.4,32.3,8.075,8.15,0.340343,0.115833,-0.94
2,2021-01-01,6.7,6.7,7.5,8.1,29.0,7.25,7.1,0.680686,0.463333,-1.53
3,2020-01-01,7.1,6.0,4.2,6.2,23.5,5.875,6.1,1.214839,1.475833,-0.93
4,2019-01-01,6.6,6.31,6.82,6.75,26.48,6.62,6.675,0.226127,0.051133,-1.11
5,2018-01-01,6.07,6.03,6.31,6.3,24.71,6.1775,6.185,0.148183,0.021958,-1.95
6,2017-01-01,5.73,5.29,5.66,5.7,22.38,5.595,5.68,0.205345,0.042167,-0.72
7,2016-01-01,5.37,4.99,5.24,5.71,21.31,5.3275,5.305,0.299819,0.089892,-1.11
8,2015-01-01,4.8,4.56,4.88,4.91,19.15,4.7875,4.84,0.158614,0.025158,-0.91
9,2014-01-01,4.24,3.87,4.15,4.18,16.44,4.11,4.165,0.164317,0.027,-0.79
