In [None]:
# -*- coding: utf-8 -*-
"""
Created on Sat Jul  6 15:00:00 2024

@author: 12345
"""
# Importing external libraries

import plotly.express as px
import plotly.offline as ply
import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn import preprocessing
import kaleido

# Parameters
# hedging tenor
# tenor_hedged = '30Y'
tools = ['1y10Y','5y5Y','10Y10y','5y30Y']
tools_X = ['1Y','5Y','10Y','5Y']
tools_Y = ['10Y','5Y','10Y','30Y']
cost_tools = 1

# Import Data

# Timeseries Data Collection - reading

data = pd.read_excel('dv01.xlsx')
DV01_params = pd.read_excel('Params.xlsx',sheet_name='DV01',index_col=0)
Vega_params = pd.read_excel('Params.xlsx',sheet_name='Vega Initial',index_col=0)
bps_Vega_params = pd.read_excel('Params.xlsx',sheet_name='bps Vega',index_col=0)


data = data.set_index('Date')

df_estr = data.copy(deep=True)
df_estr = df_estr[:-1]

combined_data = df_estr.copy(deep=True)

# swap rates plot

fig = px.line(df_estr,title = 'Swap Rates - BBG',
              width=1000,
              height=800,
              template="plotly_dark",
              labels={'value':'Swap Rates (in %)','date':"Date"})
fig.show()

ply.plot(fig)

###############################################################################
# Create a copy of the original data
# Calculate the daily change in swap rates
# Drop any NaN/infinite values
# Describe the new dataset of returns
# Plot the daily change in swap rates as a line graph
###############################################################################

df = df_estr.copy(deep=True) #creating a dataframe copy of the swap rates
returns = (df - df.shift(1))*100 # calculating the daily change (in bps)

# Removing and replacing erratic values (if any)
returns.replace([np.inf, -np.inf], np.nan, inplace=True)
returns = returns.dropna(axis=0)

# Quick look at the new dataframe
returns

#%%

###############################################################################
# Compute the covariance matrix
# Apply the PCA function to fit and transform the data
# Fetch the PCA data for explained variance of the Principal Components
# Plotting the 'Scree Plot' to identify the 'contribution' of each principal component to the variance
###############################################################################

# COMPUTING THE COVARIANCE MATRIX AND PERFORMING PCA

cov_matrix = returns.cov()

# PCA (fit & transform)

pca = PCA()
pca.fit_transform(cov_matrix)

# Explained variance

per_var = np.round(pca.explained_variance_ratio_*100,decimals=2)
labels = ['PC'+str(x) for x in range(1,len(per_var)+1)]
raw_bars = pd.DataFrame(per_var,index=labels) # quick dataframe to enable easy plotting of % variance explained by the principal components

# Plotting the graph

fig = px.bar(raw_bars[:8],
             title = '% of Explained Variance by PCs',
             width=600,
             height=500,
             labels={
                     "index": "Principal Component",
                     "value": "Percentage of Explained Variance"},
             template="plotly_dark"
             )
fig.update_layout(showlegend=False)
fig.show()


rands = pd.DataFrame({'PC1':pca.components_[0],'PC2':pca.components_[1],'PC3':pca.components_[2]}, index=cov_matrix.index)
rands


#%%
# Plotting the PCs across tenors
###############################################################################
# Plotting the principal components (1-3) across the tenors
# Interpreting the curve
# Calculating the actual values of the Principal Components
# each principal component is a linear combination of the original data and the loadings.
# We can calculate this across the entire time series by simply computing the dot product

###############################################################################




fig_pca = px.line(rands,
                  title = 'PCs across Tenors',
                  width=800,
                  height=700,
                  labels={"value":"Change in Yield","index":"Tenor"},
                  template="plotly_dark",
                  markers=True)
fig_pca.show()

# actual values of Principal Components

tas = returns.copy(deep=True)
pcas = np.dot(tas,rands)

# Storing the values in a dataframe

pca_df = pd.DataFrame(pcas,columns=['PC1','PC2','PC3'], index=tas.index)

# Combining the dataframes -- change in swap rates + PCA (will allow us to plot the data easily)
tas = tas.join(pca_df)
pca_df

# plotting the actual value of PC
fig_pca1 = px.line(tas[tas.columns[-3:]],
                   title = 'Principal Components (Actual Values)',
                   width=800,
                   height=700,
                   labels={"value":"Values","index":"Time"},
                   template='plotly_dark')
fig_pca1.show()
ply.plot(fig_pca1)

#%%

###############################################################################
# Hedging ratio
# portfolio allocation in vega per bps term

index_trans = []

for i in Vega_params.index:
    for j in Vega_params.columns:
        if (i !='TOTAL') & (j!='SUM'):
            index_trans.append(i+j)
            
Vega_initial_trans = pd.DataFrame(index=index_trans)
for i in Vega_params.index:
    for j in Vega_params.columns:
        if (i !='TOTAL') & (j!='SUM'):
            # transform the initial value by Vega initial/IV/bps Vega
            Vega_initial_trans.loc[i+j,'Vega Initial Trans'] = 

calculate the total bps Vega in the portfolio
exposure should also based on it for different tenors
sumup exposure for each factor
cost cut from vol



#%%
np.random.seed(0)
strike_prices = np.array([90, 95, 100, 105, 110])  # 行权价
maturities = np.array([30, 60, 90, 180])           # 期限
vol_surface_data = np.random.rand(100, len(strike_prices), len(maturities))  # 100个时间点

# 将波动率曲面展平为二维数组，每行一个时间点，每列一个行权价和期限组合
data_flat = vol_surface_data.reshape(100, -1)

# 将数据标准化
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_flat)

# 应用PCA
pca = PCA(n_components=3)  # 提取前3个主成分
principal_components = pca.fit_transform(data_scaled)

# 主成分解释的方差比例
explained_variance_ratio = pca.explained_variance_ratio_
print("Explained variance ratio of each principal component:", explained_variance_ratio)

# 主成分得分
principal_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2', 'PC3'])

# 可视化前两个主成分
plt.figure(figsize=(8, 6))
plt.scatter(principal_df['PC1'], principal_df['PC2'], c='blue', edgecolors='k', s=50)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.title('PCA on Volatility Surface')
plt.grid(True)
plt.show()

# 主成分加载矩阵
loadings = pca.components_
print("Loadings:\n", loadings)

# 主成分解释的总方差比例
total_explained_variance = np.sum(explained_variance_ratio)
print("Total explained variance by selected principal components:", total_explained_variance)
