In [1]:
import pandas as pd
from sklearn.decomposition import PCA
import numpy as np
from scipy.optimize import linprog


class RationalClassifier:
    def __init__(self, numerator_degree, denominator_degree, n_components, delta=1e-5, precision=1e-6):
        self.numerator_degree = numerator_degree
        self.denominator_degree = denominator_degree
        self.n_components = n_components
        self.delta = delta
        self.precision = precision
        self.alpha = None
        self.beta = None
        self.z = None

    def fit(self, X, y):
        """
        Train the rational classifier by solving for optimal alpha, beta, and z.
        """
        # Generate rational function matrices
        G_matrix, H_matrix = generate_rational_function_matrix(
            self.numerator_degree, self.denominator_degree, self.n_components, X
        )
        
        # Use bisection method to find optimal z
        self.z = bisection_method(
            uL=0, uH=100, G_matrix=G_matrix, H_matrix=H_matrix, y=y, precision=self.precision
        )
        
        # Solve for alpha and beta at the optimal z
        result = solve_lp(self.z, G_matrix, H_matrix, y, delta=self.delta)
        if not result.success:
            raise ValueError("Linear programming failed to converge.")
         
        
        # Extract alpha and beta coefficients
        self.alpha = result.x[1 : 1 + G_matrix.shape[1]]
        self.beta = result.x[1 + G_matrix.shape[1] :]

    def predict(self, X):
        """
        Predict class labels for input data X.
        """
        # Generate rational function matrices for prediction
        G_matrix, H_matrix = generate_rational_function_matrix(
            self.numerator_degree, self.denominator_degree, self.n_components, X
        )
        
        # Compute rational function values
        numerator = np.dot(G_matrix, self.alpha)
        denominator =  1e-5+ np.dot(H_matrix, self.beta)
        rational_values = numerator / denominator
        
        # Apply threshold to classify
        predictions = (rational_values > 0.3).astype(int)
        return predictions


# Generating multi-indices
def r_multi_indices(n, d):
    if n == 1:
        yield (d,)
    else:
        for k in range(d + 1):
            for c in r_multi_indices(n - 1, k):
                yield (d - k, *c)


def generate_multi_indices(n, d):
    from itertools import chain
    return list(chain(*[list(r_multi_indices(n, _)) for _ in range(d + 1)]))


# Generating rational function matrices
def generate_rational_function_matrix(numerator_degree, denominator_degree, n_components, dataset):
    G_indices = generate_multi_indices(n_components, numerator_degree)
    H_indices = generate_multi_indices(n_components, denominator_degree)

    G_matrix = np.zeros((len(dataset), len(G_indices)))
    H_matrix = np.zeros((len(dataset), len(H_indices)))

    for i, data_point in enumerate(dataset):
        G_matrix[i] = [np.prod([data_point[k] ** idx[k] for k in range(len(data_point))]) for idx in G_indices]
        H_matrix[i] = [np.prod([data_point[k] ** idx[k] for k in range(len(data_point))]) for idx in H_indices]

    return G_matrix, H_matrix


# Linear programming setup
def solve_lp(z, G_matrix, H_matrix, y, delta=1e-5):
    length_dataset = len(y)
    num_alpha = G_matrix.shape[1]
    num_beta = H_matrix.shape[1]

    c = [1] + [0] * (num_alpha + num_beta)
    
    A_ub, b_ub = [], []

    for i in range(length_dataset):
        G_row = G_matrix[i]
        H_row = H_matrix[i]
        y_i = y[i]

        # First constraint
        A_ub.append([-1] + [-g for g in G_row] + [(y_i - z) * h for h in H_row])
        b_ub.append(0)

        # Second constraint
        A_ub.append([-1] + [g for g in G_row] + [-(y_i - z) * h for h in H_row])
        b_ub.append(0)

        # Positivity constraint
        A_ub.append([0] + [0] * num_alpha + [-h for h in H_row])
        b_ub.append(-delta)

    bounds = [(0, None)] + [(None, None)] * (num_alpha + num_beta)
    result = linprog(c=c, A_ub=np.array(A_ub), b_ub=np.array(b_ub), bounds=bounds, method="highs")
   # print(f"Optimization result: {result}")
    return result

# Bisection method to find optimal z
#def bisection_method(uL, uH, G_matrix, H_matrix, y, precision=1e-6):
   # while (uH - uL) > precision:
     #   z = (uH + uL) / 2
      #  result = solve_lp(z, G_matrix, H_matrix, y)
      #  if result.success:
        #    uH = z
       # else:
         #   uL = z
  #  return uH
def bisection_method(uL, uH, G_matrix, H_matrix, y, precision=1e-6):
  
    while (uH - uL) > precision:
        z = (uH + uL) / 2
        result = solve_lp(z, G_matrix, H_matrix, y)
      
        if result.success:
            uH = z
        else:
            uL = z

    return uH


In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# Set random seed for reproducibility
np.random.seed(42)

# Generate 100 samples with 10 features each, as pixel intensities (random between 0 and 1)
X = np.random.rand(100, 10)

# Generate binary labels (0 or 1)
y = np.random.randint(0, 2, size=100)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Show the first 20 rows of the dataset
df = pd.DataFrame(X, columns=[f'feature_{i+1}' for i in range(10)])
df['label'] = y

print("Dataset (first 20 rows):")
print(df.head(20))

# Show the train and test splits (you can adjust the print below to check the training set or test set)
print("\nTraining Set:")
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")

print("\nTesting Set:")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")


Dataset (first 20 rows):
    feature_1  feature_2  feature_3  feature_4  feature_5  feature_6  \
0    0.374540   0.950714   0.731994   0.598658   0.156019   0.155995   
1    0.020584   0.969910   0.832443   0.212339   0.181825   0.183405   
2    0.611853   0.139494   0.292145   0.366362   0.456070   0.785176   
3    0.607545   0.170524   0.065052   0.948886   0.965632   0.808397   
4    0.122038   0.495177   0.034389   0.909320   0.258780   0.662522   
5    0.969585   0.775133   0.939499   0.894827   0.597900   0.921874   
6    0.388677   0.271349   0.828738   0.356753   0.280935   0.542696   
7    0.772245   0.198716   0.005522   0.815461   0.706857   0.729007   
8    0.863103   0.623298   0.330898   0.063558   0.310982   0.325183   
9    0.119594   0.713245   0.760785   0.561277   0.770967   0.493796   
10   0.031429   0.636410   0.314356   0.508571   0.907566   0.249292   
11   0.289751   0.161221   0.929698   0.808120   0.633404   0.871461   
12   0.807440   0.896091   0.318003   0

In [15]:

from sklearn.metrics import accuracy_score, f1_score, precision_score, confusion_matrix,recall_score

# Initialize RationalClassifier with chosen parameters
numerator_degree = 2
denominator_degree = 2
n_components = 10  # Number of components (can be adjusted)

classifier = RationalClassifier(numerator_degree=numerator_degree,
                               denominator_degree=denominator_degree,
                               n_components=n_components)

# Train the classifier on the dataset
classifier.fit(X_train, y_train)

# Get predictions from the RationalClassifier
rational_predictions = classifier.predict(X_test)

print("Alpha coefficients:", classifier.alpha)
print("Beta coefficients:", classifier.beta)
print("Optimal z value:", classifier.z)


# Calculate precision, recall, and F1 score for both classes (0 and 1)
accuracy = accuracy_score(y_test, rational_predictions)
precision = precision_score(y_test, rational_predictions)
recall = recall_score(y_test, rational_predictions)
f1 = f1_score(y_test, rational_predictions)

# Print precision, recall, and F1 score
print(f"accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-score: {f1}")





Alpha coefficients: [ 1.55180743e-04  0.00000000e+00  3.37086189e-04  0.00000000e+00
 -6.88056056e-04  0.00000000e+00  8.07643011e-04  5.34456231e-05
 -1.71219184e-03  3.90154473e-04 -3.75665249e-04  9.46964107e-04
  1.14622456e-03 -1.23012361e-03 -1.13749179e-03  8.15511208e-04
 -7.66542723e-04 -7.22546658e-04 -1.69187529e-04  5.79402149e-04
 -6.41701179e-04  9.24524284e-04 -1.26766699e-03 -5.08333707e-04
 -1.23923979e-03  6.74006434e-04  1.01206994e-05 -1.10004754e-03
 -5.19508805e-04 -4.46380257e-04  7.06530363e-04  3.84173778e-04
  6.25532406e-04 -6.27317057e-04 -1.02350505e-03  1.92688897e-03
  3.24057291e-04  3.50597391e-04  1.25523258e-03 -2.65860262e-04
 -4.09054248e-04 -2.07176380e-04  1.00776107e-03  5.17843919e-06
 -5.40465632e-06  1.28966609e-04 -5.49211949e-04  0.00000000e+00
  3.13741880e-04  5.50813898e-04 -1.98209721e-04  2.32032625e-04
 -9.11754214e-04  8.09213775e-04 -1.28582878e-05 -3.59974852e-04
  8.02864708e-04  8.26354531e-04 -6.30575291e-04  7.14164957e-04
 -1.8

In [11]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, confusion_matrix,recall_score


# Train a Logistic Regression model with class weights
model = LogisticRegression(class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

# Get predictions for the test set
y_pred = model.predict(X_test)

# Calculate precision, recall, and F1 score for both classes (0 and 1)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# Print precision, recall, and F1 score
print(f"accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-score: {f1}")





accuracy: 0.5
Precision: 0.6
Recall: 0.2727272727272727
F1-score: 0.375


In [13]:
# Optionally, compare the RationalClassifier predictions with the Logistic Regression predictions
comparison_df = pd.DataFrame({
    'Logistic Regression Prediction': y_pred[:20],
    'RationalClassifier Prediction': rational_predictions[:20],
    'Actual': y_test[:20]
})

print("\nPrediction Comparison (first 20):\n")
print(comparison_df)


Prediction Comparison (first 20):

    Logistic Regression Prediction  RationalClassifier Prediction  Actual
0                                1                              0       1
1                                0                              1       1
2                                0                              0       0
3                                0                              1       1
4                                0                              0       1
5                                0                              1       0
6                                1                              1       1
7                                0                              1       0
8                                1                              0       0
9                                0                              0       0
10                               0                              1       0
11                               0                              0       0
12