In [50]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans
from sklearn import preprocessing
import xlrd
from pca import pca
import plotly.express as px
from IPython.display import display

# Load data from an Excel file
file = pd.read_excel("dataset2.xlsx", sheet_name=0, header=0)

# Extract and preprocess the data
df = file.iloc[2:22, [0] + list(range(4, 17))]
df.columns = df.iloc[0]  # Set column names based on the first row of the data
df = df.iloc[1:].reset_index(drop=True)  # Remove the first row and unwanted columns
df.set_index(df.columns[0], inplace=True)  # Set row names based on the first column of the data
df = df.apply(pd.to_numeric, errors='ignore')  # Convert data types to numeric

# Define the desired order of columns in a list
desired_column_order = ['Janv', 'Fév', 'Mars', 'Avril', 'Mai', 'Juin', 'Juillet', 'Août', 'Sept', 'Oct', 'Nov', 'Dec']

# Reorder the columns of the DataFrame 'df' based on the desired order
df = df[desired_column_order]

df.columns.name = "Délégation"  # Set the name for the columns of the DataFrame
df = df.rename_axis("")  # Remove the row index name to make it empty

# Display the resulting DataFrame
display(df)


Délégation,Janv,Fév,Mars,Avril,Mai,Juin,Juillet,Août,Sept,Oct,Nov,Dec
,,,,,,,,,,,,
"(Délégation Agadir, مندوبية اكادير)",2063.2318,2049.1711,3748.4683,3476.5231,2967.0594,3580.238,1829.4766,5543.4701,3145.7519,2331.2041,2572.9792,2793.836
"(Délégation Sidi ifni, مندوبية سيدي إفني )",4567.3875,4498.6455,10998.9095,8573.6835,5961.0586,7408.4446,945.7732,990.0075,1526.6024,1790.7085,889.3,399.459
"(Délégation Tantan, مندوبية طان طان)",2419.4675,2399.887,19316.372,5850.1835,1756.706,16436.374,5819.4285,7779.717,14135.7115,9484.18518,19384.4535,18355.659
"(Délégation Casa, مندوبية الدار البيضاء)",1739.67488,1119.56725,1746.97104,1119.32676,1189.0874,3573.79968,2488.2361,3073.9389,2113.05588,607.4283,1545.74754,1631.22228
"(Délégation El Jadida, مندوبية الجديدة)",4099.4425,3232.061,1194.1785,942.859,2347.4795,1818.531,1589.7595,2993.2915,4984.24,3938.5225,2787.403,2324.2795
"(Délégation Essaouira, مندوبية الصويرة)",408.09606,325.23267,1200.24037,515.19456,793.199,1441.77476,511.5761,905.21848,1148.08638,1949.3021,2647.13757,2478.22731
"(Délégation Mehdia, مندوبية مهدية)",1251.95,395.098,721.79,573.419,1645.341,1242.585,1157.804,1977.091,1074.239,440.059,681.205,986.162
"(Délégation Mohamedia, مندوبية المحمدية)",353.1315,141.1625,132.358,60.0035,23.7425,470.6225,264.0655,100.9975,169.2105,384.335,483.6775,396.8995
"(Délégation Rabat, مندوبية الرباط)",39.3475,17.3585,20.242,21.4131,58.758,404.995,48.4365,43.5635,0.776,0.733,0.75,59.2095


In [51]:
# Standardize the data
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)

# Calculate eigenvalues and eigenvectors
cov_matrix = np.cov(df_scaled.T)
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

# Calculate explained variance ratio
explained_variance_ratio = eigenvalues / np.sum(eigenvalues)

# Perform PCA using scikit-learn
pca = PCA()
principal_components = pca.fit_transform(df_scaled)

# Get the explained variance ratio
explained_variance_ratio = pca.explained_variance_ratio_

# Create a DataFrame for the Scree plot
scree_data = pd.DataFrame({
    'Principal Component': np.arange(1, len(explained_variance_ratio) + 1),
    'Explained Variance': explained_variance_ratio
})

# Create the Scree plot using Plotly
fig = px.bar(scree_data, x='Principal Component', y='Explained Variance',
             title='Scree Plot PCA', text='Explained Variance',
             labels={'Explained Variance': 'Explained Variance (%)'})

# Format the percentage labels
fig.update_traces(texttemplate='%{text:.2%}', textposition='outside')

# Show the plot
fig.show()


In [52]:

# Créez un DataFrame pour afficher le rapport de variance expliquée
explained_variance_df = pd.DataFrame({'eigenvalue': eigenvalues,
                                      'variance.percent': explained_variance_ratio * 100,
                                      'cumulative.variance.percent': explained_variance_ratio.cumsum() * 100})

# Renommez les index pour correspondre à la sortie souhaitée
explained_variance_df.index = ['Dim.' + str(i) for i in range(1, len(explained_variance_ratio) + 1)]

# Afficher le DataFrame
print(explained_variance_df)

        eigenvalue  variance.percent  cumulative.variance.percent
Dim.1    11.929993         94.184157                    94.184157
Dim.2     0.474624          3.747031                    97.931188
Dim.3     0.177697          1.402870                    99.334058
Dim.4     0.057246          0.451940                    99.785998
Dim.5     0.012331          0.097349                    99.883347
Dim.6     0.006597          0.052082                    99.935429
Dim.7     0.003790          0.029920                    99.965349
Dim.8     0.003197          0.025242                    99.990591
Dim.9     0.000799          0.006309                    99.996900
Dim.10    0.000014          0.001612                    99.998512
Dim.11    0.000204          0.001376                    99.999888
Dim.12    0.000174          0.000112                   100.000000
