In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
test_file = '/content/drive/MyDrive/TwoPatterns/TwoPatterns_TEST.txt'
train_file = '/content/drive/MyDrive/TwoPatterns/TwoPatterns_TRAIN.txt'

In [None]:
!pip install gudhi
#!pip install --upgrade gudhi


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install sklearn


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import gudhi as gd
import gudhi.persistence_graphical_tools as gpt
import gudhi.representations
#from gd.representations import Landscape



# Load the data
def load_data(file_path):
    data = pd.read_csv(file_path, header=None, delim_whitespace=True)
    labels = data.iloc[:, 0].values
    time_series = data.iloc[:, 1:].values
    return time_series, labels



X_train, y_train = load_data(train_file)
X_test, y_test = load_data(test_file)



def sliding_window(ts, m, tau):
    return np.array([ts[i:i + m * tau:tau] for i in range(len(ts) - (m - 1) * tau)])

m = 5
tau = 1

point_clouds_train = np.array([sliding_window(ts, m, tau) for ts in X_train])
point_clouds_test = np.array([sliding_window(ts, m, tau) for ts in X_test])

# Compute persistence diagrams
def compute_persistence_diagrams(point_clouds, max_edge_length=1.0):
    diagrams = []
    for pc in point_clouds:
        rips_complex = gd.RipsComplex(points=pc, max_edge_length=max_edge_length)
        simplex_tree = rips_complex.create_simplex_tree(max_dimension=2)
        diagrams.append(simplex_tree.persistence())
    return diagrams

diagrams_train = compute_persistence_diagrams(point_clouds_train)
diagrams_test = compute_persistence_diagrams(point_clouds_test)












In [None]:
#rips_complex = gd.RipsComplex(points=pc, max_edge_length= 1)
#simplex_tree = rips_complex.create_simplex_tree(max_dimension=2)

In [None]:
diagrams_train[11]

[(1, (0.4187443882974823, 0.5594127950278094)),
 (1, (0.5341273413378421, 0.6443205131008599)),
 (1, (0.6215534713114521, 0.7287326161117949)),
 (1, (0.616721866152969, 0.716399684709996)),
 (1, (0.45100347726808065, 0.5327449006011206)),
 (1, (0.6397396423679697, 0.7022967216568635)),
 (1, (0.5270424703408226, 0.5865580709336439)),
 (1, (0.5943914428506804, 0.6471468513939386)),
 (1, (0.5200643530467146, 0.5713537075824692)),
 (1, (0.3494338421246547, 0.39319121877830593)),
 (1, (0.5449821313036414, 0.5776169676436144)),
 (1, (0.3831880273767622, 0.41525854566881704)),
 (1, (0.3047834821280636, 0.3358172516251857)),
 (1, (0.49627062627907825, 0.5268888543037145)),
 (1, (0.5394616342195782, 0.5673654128280231)),
 (1, (0.5697921954463241, 0.5971581592560058)),
 (1, (0.3637647869655924, 0.3868084760167666)),
 (1, (0.5816340434691267, 0.6029681125326132)),
 (1, (0.5149965121094747, 0.5360546828841556)),
 (1, (0.38309268237664784, 0.3991232094191247)),
 (1, (0.7246800898821969, 0.736601374

In [None]:
diags = [[list(point[1]) for point in dgm if point[0] == 1] for dgm in diagrams_train]

In [None]:
np.isnan([2,np.nan]).any()

True

In [None]:
np.array([np.array([1,2,3]), np.array([1,2,5])])

array([[1, 2, 3],
       [1, 2, 5]])

In [None]:

# Feature extraction
def extract_features(diagrams, homology_dim=1):
    #persistence = gd.representations.Landscape()
    features = []
    for diagram in diagrams:
        #persistence.fit([dgm for dgm in diagram if dgm[0] == homology_dim])
        #diags = [dgm for dgm in diagram if dgm[0] == homology_dim]
        diags = [np.array([list(point[1]) for point in diagram if point[0] == homology_dim and not np.isnan(list(point[1])).any() and not np.isinf(list(point[1])).any()])]
        persistence=gd.representations.Landscape(num_landscapes=2,resolution=10).fit_transform(diags)
        persistence_features = persistence.flatten()
        #persistence_features = persistence_features[~np.isnan(persistence_features)]
        features.append(persistence_features)
        #import pdb; pdb.set_trace()
    return np.array(features)

X_train_features = extract_features(diagrams_train)
X_test_features = extract_features(diagrams_test)
#x = x[~numpy.isnan(x)]








In [None]:
X_train_features.shape

(1000, 20)

In [None]:
y_train[:5]

array([2., 3., 4., 4., 2.])

In [None]:
# Train and evaluate the classifier
#pipeline = make_pipeline(
#    StandardScaler(),
#    SVC(kernel="linear", C=1, random_state=42)
#)

#pipeline.fit(X_train_features, y_train)
#y_pred = pipeline.predict(X_test_features)

#print("Classifier performance:")
#print(classification_report(y_test, y_pred))
#print(f"Accuracy: {accuracy_score(y_test, y_pred)}")


In [None]:
# X_train_features is a matrix of size 1000X20 the places where it's nan the coordinates is given by (i,j)
np.argwhere(np.isnan(X_train_features))


array([], shape=(0, 2), dtype=int64)

In [None]:
X_train_features[84]

array([0.00809493, 0.02504107, 0.06562863, 0.03430169, 0.0742056 ,
       0.04645601, 0.09990234, 0.07501908, 0.09789763, 0.06701388,
       0.00521814, 0.02381121, 0.05579278, 0.02618212, 0.06404878,
       0.04219966, 0.        , 0.03088376, 0.06143269, 0.01909208])

In [None]:


y_test


array([2., 3., 1., ..., 2., 2., 1.])

In [None]:
from sklearn.impute import SimpleImputer
from sklearn import svm

#imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp = SimpleImputer(missing_values=np.nan, fill_value= 0 )

imp = imp.fit(X_train_features)

X_train_imp = imp.transform(X_train_features)
X_test_imp = imp.transform(X_test_features)

clf = svm.SVC()
clf = clf.fit(X_train_imp, y_train)
predictions = clf.predict(X_test_imp)

In [None]:
print("Classifier performance:")
print(classification_report(y_test, predictions))
print(f"Accuracy: {accuracy_score(y_test, predictions)}")

Classifier performance:
              precision    recall  f1-score   support

         1.0       0.26      0.44      0.32      1035
         2.0       0.21      0.09      0.12      1011
         3.0       0.24      0.27      0.26       995
         4.0       0.25      0.18      0.21       959

    accuracy                           0.25      4000
   macro avg       0.24      0.24      0.23      4000
weighted avg       0.24      0.25      0.23      4000

Accuracy: 0.2465


In [None]:
predictions

array([1., 1., 1., ..., 3., 1., 4.])