In [4]:
import pandas as pd
import numpy as np

def load_data(file_path, sheet_name):
    """Load data from an Excel sheet and return a DataFrame."""
    try:
        df = pd.read_excel(file_path, sheet_name=sheet_name)
        df = df.apply(pd.to_numeric, errors='coerce')

        if df.isnull().values.any():
            df = df.fillna(0)

        return df
    except FileNotFoundError:
        raise FileNotFoundError(f"Error: The file at path '{file_path}' was not found.")
    except ValueError as ve:
        raise ValueError(f"Value Error: {ve}")
    except Exception as e:
        raise Exception(f"An error occurred: {e}")

def extract_vectors(df, index1, index2):
    """Extract feature vectors for the given indices."""
    vector1 = df.loc[index1].values
    vector2 = df.loc[index2].values
    return vector1, vector2

def calculate_cosine_similarity(vector1, vector2):
    """Calculate the cosine similarity between two vectors."""
    dot_product = np.dot(vector1, vector2)
    norm_vector1 = np.linalg.norm(vector1)
    norm_vector2 = np.linalg.norm(vector2)

    if norm_vector1 > 0 and norm_vector2 > 0:
        return dot_product / (norm_vector1 * norm_vector2)
    else:
        return float('nan')  # Handle zero vectors

def main():
    file_path = '/content/Lab Session Data.xlsx'
    sheet_name = 'thyroid0387_UCI'
    index1 = 0  # Index of the first observation
    index2 = 1  # Index of the second observation

    try:
        # Load the dataset
        df = load_data(file_path, sheet_name)

        # Extract vectors
        vector1, vector2 = extract_vectors(df, index1, index2)

        # Calculate cosine similarity
        similarity = calculate_cosine_similarity(vector1, vector2)

        print("\nCosine Similarity:", similarity)

    except FileNotFoundError as e:
        print(e)
    except ValueError as ve:
        print(ve)
    except Exception as e:
        print(e)

if __name__ == "__main__":
    main()



Cosine Similarity: 0.9999999999999885
