<a href="https://colab.research.google.com/github/SAMYSOSERIOUS/Stats-analytics/blob/main/CodeTabPyExp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Import libraries


import pandas as pd


from sklearn.preprocessing import StandardScaler


from sklearn.svm import OneClassSVM





# Function to filter IT department employees


def filter_it_department(df):


    return df[df['Department'] == 'IT']





# Function to compute the average salary


def compute_average_salary(df):


    average_salary = df['Salary'].mean()


    df['Average_Salary'] = average_salary


    return df





# Function to find the oldest employee


def find_oldest_employee(df):


    oldest_employee = df.loc[df['Age'].idxmax()]


    df['Oldest_Employee_Name'] = oldest_employee['Name']


    df['Oldest_Employee_Dept'] = oldest_employee['Department']


    return df





# Function to detect outliers (adapted for a dataset with 'date' and 'amount' columns)


def detect_outliers(input_df, outliers_fraction=0.05):


    # Sorting by the Date column


    df = input_df.sort_values('date')





    # Taking the sum of amounts on a given day


    df = pd.pivot_table(df, index='date', values='amount', aggfunc='sum')


    df = df.reset_index()





    # Scaling the data


    scaler = StandardScaler()


    scaled_data = scaler.fit_transform(pd.DataFrame(df['amount']))


    data = pd.DataFrame(scaled_data)





    # Training the OneClassSVM


    model = OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1)


    model.fit(data)





    # Getting the predicted outliers


    df['anomaly'] = pd.Series(model.predict(data))





    # Setting the 'anomaly' column as boolean


    df['anomaly'] = [True if x == -1 else False for x in df['anomaly']]





    return df





# Function to define the output schema for Tableau Prep


def get_output_schema():


    return pd.DataFrame({


        'ID': prep_int(),


        'Name': prep_string(),


        'Age': prep_int(),


        'Salary': prep_int(),


        'Department': prep_string(),


        'Average_Salary': prep_int(),


        'Oldest_Employee_Name': prep_string(),


        'Oldest_Employee_Dept': prep_string()


    })





# Main function for Tableau Prep


def script(df):


    # Filter IT department employees


    df = filter_it_department(df)





    # Compute average salary


    df = compute_average_salary(df)





    # Find the oldest employee


    df = find_oldest_employee(df)





    # Return the processed DataFrame


    return df