In [3]:
import pandas as pd
import numpy as np

def add_absolute_z_scores_to_csv(input_file, output_file):
    """
    Reads a CSV file, calculates absolute z-scores for the 'anomaly_scores' column, 
    and saves the modified DataFrame with the absolute z-scores as a new column to a new CSV file.

    Parameters:
        input_file (str): Path to the input CSV file.
        output_file (str): Path to save the output CSV file.
    """
    try:
        # Read the CSV file
        data = pd.read_csv(input_file)
        
        # Check if 'anomaly_scores' column exists
        if 'Anomaly_Score' not in data.columns:
            print("Error: 'Anomaly_Score' column not found in the input file.")
            return
        
        # Calculate z-scores for the 'anomaly_scores' column
        scores = data['Anomaly_Score']
        mean = np.mean(scores)
        std_dev = np.std(scores)
        
        # Avoid division by zero if standard deviation is 0
        if std_dev == 0:
            data['z_scores'] = 0
        else:
            # Calculate absolute z-scores
            data['z_scores'] = np.abs((scores - mean) / std_dev)
        
        # Save the modified DataFrame to a new CSV file
        data.to_csv(output_file, index=False)
        print(f"File with absolute z-scores saved successfully to {output_file}")

    except FileNotFoundError:
        print("Error: Input file not found. Please check the file path.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
# Replace 'input_file.csv' with the path to your input CSV file
# Replace 'output_file.csv' with the desired output file path
add_absolute_z_scores_to_csv(r"C:\Users\PRANAV\OneDrive\Desktop\fedl\3 clients\x_part3.csv", r"C:\Users\PRANAV\OneDrive\Desktop\fedl\3 clients\x_part3.csv")


File with absolute z-scores saved successfully to C:\Users\PRANAV\OneDrive\Desktop\fedl\3 clients\x_part3.csv


In [44]:
import pandas as pd

def add_labels_for_multiple_thresholds(input_file, thresholds):
    """
    Reads a CSV file, compares 'Anomaly_Score' with a set of thresholds, 
    and adds a 'local_labels' column to the same input file for each threshold.

    Parameters:
        input_file (str): Path to the input CSV file.
        thresholds (list of float): List of threshold values for labeling anomalies.
    """
    try:
        # Read the input CSV file
        data = pd.read_csv(input_file)

        # # Check if the 'Anomaly_Score' column exists
        # if 'Anomaly_Score' not in data.columns:
        #     print("Error: 'Anomaly_Score' column not found in the input file.")
        #     return

        # Iterate through the list of thresholds
        for threshold in thresholds:
            # Create a new column 'local_labels' based on the threshold
            data['c5_labels'] = (data['scores_c5'] > threshold).astype(int)

            # Save the modified DataFrame back to the same CSV file
            data.to_csv(input_file, index=False)
            print(f"Threshold {threshold} applied and saved to {input_file}")

    except FileNotFoundError:
        print("Error: Input file not found. Please check the file path.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
# Replace 'input_file.csv' with the path to your input CSV file
# Specify a list of thresholds to compare against
add_labels_for_multiple_thresholds(
    input_file=r"/home/saipranav/Desktop/fedl/tt4/x_part6.csv",
    thresholds=[0.5382]

)


Threshold 0.5382 applied and saved to /home/saipranav/Desktop/fedl/tt4/x_part6.csv


In [37]:
import pandas as pd

def calculate_anomaly_stats(file_path):
    try:
        # Load the CSV file
        data = pd.read_csv(file_path)

        # # Check if 'anomaly_scores' column exists
        # if 'Anomaly_Score' not in data.columns:
        #     raise ValueError("The column 'anomaly_scores' does not exist in the CSV file.")

        # Calculate mean and standard deviation
        mean_score = round(data['Anomaly_Score'].mean(), 6)
        std_dev_score = round(data['Anomaly_Score'].std(), 6)

        return mean_score, std_dev_score

    except Exception as e:
        return str(e)

# Example usage
file_path = r"/home/saipranav/Desktop/fedl/tt4/x_part5.csv" # Replace with your CSV file path
mean, std_dev = calculate_anomaly_stats(file_path)
result = mean+(std_dev*7)
print(result)
print(f"Mean of anomaly_scores: {mean}")
print(f"Standard deviation of anomaly_scores: {std_dev}")


0.5382
Mean of anomaly_scores: 0.353484
Standard deviation of anomaly_scores: 0.026388
