In [4]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis, entropy

# Define the feature extraction function
def extract_features(decimal_values):
    features = pd.DataFrame()

    # Example feature 1: Mean
    features['mean'] = decimal_values.rolling(window=5).mean()

    # Example feature 2: Standard Deviation
    features['std'] = decimal_values.rolling(window=5).std()

    # Example feature 3: Skewness
    features['skewness'] = decimal_values.rolling(window=5).apply(skew)

    # Example feature 4: Kurtosis
    features['kurtosis'] = decimal_values.rolling(window=5).apply(kurtosis)

    # Example feature 5: Entropy
    features['entropy'] = decimal_values.rolling(window=5).apply(lambda x: entropy(np.histogram(x, bins=10)[0]))

    # Fill any NaN values that resulted from rolling calculations
    features = features.fillna(0)
    
    return features

# Load your dataset
df = pd.read_csv('ciphertext.csv')

# Extract features from Decimal_Value
X = extract_features(df['Decimal_Value'])

# Continue with clustering, standardization, etc.
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

# Standardize features for better clustering
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply KMeans Clustering
kmeans = KMeans(n_clusters=3, random_state=42)  # Assuming 3 possible algorithms
df['Cluster'] = kmeans.fit_predict(X_scaled)

# Analyzing the clusters
print(df.groupby('Cluster').mean())  # Check if clusters have distinguishable features

# Predicting the cluster (and thus the algorithm) for a new Decimal_Value
new_data = pd.DataFrame({'Decimal_Value': [105]})  # Replace with actual value
X_new = extract_features(new_data['Decimal_Value'])
X_new_scaled = scaler.transform(X_new)
predicted_cluster = kmeans.predict(X_new_scaled)

print(f"The new data likely belongs to Cluster: {predicted_cluster[0]}")



         Decimal_Value
Cluster               
0           158.609670
1           101.548793
2           132.496970
The new data likely belongs to Cluster: 1


  super()._check_params_vs_input(X, default_n_init=10)


In [5]:
# Group by clusters and inspect the sample data
cluster_summary = df.groupby('Cluster').mean()  # Replace with relevant analysis
print(cluster_summary)

         Decimal_Value
Cluster               
0           158.609670
1           101.548793
2           132.496970


In [None]:
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

# Reduce the dimensions to 2D using PCA for visualization
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Plot the clustered data
plt.figure(figsize=(10, 7))
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=df['Cluster'], cmap='viridis', marker='o', edgecolor='k', s=100)
plt.title("Clustering of Ciphertext Data")
plt.xlabel("Principal Component 1")
plt.ylabel("Principal Component 2")
plt.colorbar(label='Cluster')
plt.grid(True)
plt.show()
