<a href="https://colab.research.google.com/github/saro0307/Insyt-Your-Intelligent-Data-Analysis-Tool/blob/main1/insyt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, mean_squared_error, r2_score
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display


In [None]:
# Function to display confusion matrix heatmap
def display_confusion_matrix(confusion_mat):
    fig, ax = plt.subplots(figsize=(8, 6))
    ax.imshow(confusion_mat, cmap="Blues", interpolation="nearest")
    ax.set_title("Confusion Matrix")
    ax.set_xlabel("Predicted")
    ax.set_ylabel("True")
    for i in range(confusion_mat.shape[0]):
        for j in range(confusion_mat.shape[1]):
            ax.text(j, i, str(confusion_mat[i, j]), ha="center", va="center")
    plt.show()

# Function to display regression predictions
def display_regression_predictions(y_test, y_pred):
    fig, ax = plt.subplots()
    ax.scatter(y_test, y_pred, color='black')
    ax.set_title("Regression Prediction")
    ax.set_xlabel("True Values")
    ax.set_ylabel("Predicted Values")
    plt.show()

# Function to display scatter plot
def display_scatter_plot(data, x_col, y_col):
    fig, ax = plt.subplots()
    ax.scatter(data[x_col], data[y_col], color='blue')
    ax.set_title(f'Scatter Plot: {x_col} vs {y_col}')
    ax.set_xlabel(x_col)
    ax.set_ylabel(y_col)
    plt.show()

# Function to display PCA results
def display_pca(X, n_components):
    pca = PCA(n_components=n_components)
    principal_components = pca.fit_transform(X)
    print(f'Explained Variance Ratio: {pca.explained_variance_ratio_}')

    # Scatter plot of the first two principal components
    fig, ax = plt.subplots()
    ax.scatter(principal_components[:, 0], principal_components[:, 1], color='green')
    ax.set_title("PCA: First Two Components")
    ax.set_xlabel("Principal Component 1")
    ax.set_ylabel("Principal Component 2")
    plt.show()

# Main function for data analysis
def main():
    print("Welcome to Insyt Pro: Data Analysis and Visualization Tool")

    # Load the CSV file
    file_path = input("Enter the path to your CSV file: ")
    dataset = pd.read_csv(file_path)

    # Display the first few rows of the dataset
    print("\nDataset Preview:")
    print(dataset.head())

    # Display the dataset summary
    print("\nDataset Summary:")
    print(dataset.describe())

    # Get all the columns and store them in a variable
    all_columns = list(dataset.columns)

    # Dropdowns for selecting X and Y axis columns for scatter plot
    x_dropdown = widgets.Dropdown(options=all_columns, description="X-axis column:")
    y_dropdown = widgets.Dropdown(options=all_columns, description="Y-axis column:")

    display(x_dropdown)
    display(y_dropdown)

    # Button to generate scatter plot
    generate_scatter_button = widgets.Button(description="Generate Scatter Plot")
    display(generate_scatter_button)

    def on_scatter_click(b):
        x_col = x_dropdown.value
        y_col = y_dropdown.value
        print(f"\nSelected X-axis: {x_col}")
        print(f"Selected Y-axis: {y_col}")
        display_scatter_plot(dataset, x_col, y_col)

    generate_scatter_button.on_click(on_scatter_click)

    # Dropdown for selecting target variable
    target_dropdown = widgets.Dropdown(options=all_columns, description="Target variable:")
    display(target_dropdown)

    # MultiSelect widget for selecting columns for analysis
    columns_multiselect = widgets.SelectMultiple(options=all_columns, description="Columns for analysis:")
    display(columns_multiselect)

    # Dropdown for selecting analysis task
    task_dropdown = widgets.Dropdown(options=['Classification', 'Regression', 'PCA'], description="Task:")
    display(task_dropdown)

    # Button for executing analysis
    analyze_button = widgets.Button(description="Run Analysis")
    display(analyze_button)

    def on_analyze_click(b):
        target_column = target_dropdown.value
        selected_columns = list(columns_multiselect.value)

        if not selected_columns:
            print("Please select at least one column for analysis.")
            return

        # Separate features (X) and target variable (y)
        X = dataset[selected_columns]
        y = dataset[target_column]

        # Perform one-hot encoding for categorical variables
        X_encoded = pd.get_dummies(X, drop_first=True)

        # Split the dataset into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

        task = task_dropdown.value.lower()

        if task == 'classification':
            # Train a decision tree classifier
            model = DecisionTreeClassifier(random_state=42)
            model.fit(X_train, y_train)

            # Make predictions on the test set
            y_pred = model.predict(X_test)

            # Evaluate the model
            accuracy = accuracy_score(y_test, y_pred)
            report = classification_report(y_test, y_pred)
            confusion_mat = confusion_matrix(y_test, y_pred)

            # Display results
            print("\nAccuracy:", accuracy)
            print("\nClassification Report:\n", report)
            print("\nConfusion Matrix:\n", confusion_mat)

            # Visualize the confusion matrix
            display_confusion_matrix(confusion_mat)

        elif task == 'regression':
            # Train a linear regression model
            model = LinearRegression()
            model.fit(X_train, y_train)

            # Make predictions on the test set
            y_pred = model.predict(X_test)

            # Evaluate the model
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)

            # Display results
            print("\nMean Squared Error:", mse)
            print("\nR-squared Score:", r2)

            # Visualize the predictions
            display_regression_predictions(y_test, y_pred)

        elif task == 'pca':
            # Perform PCA
            n_components = widgets.IntSlider(value=2, min=2, max=min(X_encoded.shape[1], 10), description="PCA Components")
            display(n_components)

            def on_pca_change(change):
                display_pca(X_encoded, n_components.value)

            n_components.observe(on_pca_change, names='value')

    analyze_button.on_click(on_analyze_click)

if __name__ == "__main__":
    main()


Welcome to Insyt Pro: Data Analysis and Visualization Tool
Enter the path to your CSV file: /content/archive.zip

Dataset Preview:
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       0.68   
2                 15.0                  54.0   0.9970  3.26       0.65   
3                 17.0                  60.0   0.9980  3.16       0.58   
4                 11.0        

Dropdown(description='X-axis column:', options=('fixed acidity', 'volatile acidity', 'citric acid', 'residual …

Dropdown(description='Y-axis column:', options=('fixed acidity', 'volatile acidity', 'citric acid', 'residual …

Button(description='Generate Scatter Plot', style=ButtonStyle())

Dropdown(description='Target variable:', options=('fixed acidity', 'volatile acidity', 'citric acid', 'residua…

SelectMultiple(description='Columns for analysis:', options=('fixed acidity', 'volatile acidity', 'citric acid…

Dropdown(description='Task:', options=('Classification', 'Regression', 'PCA'), value='Classification')

Button(description='Run Analysis', style=ButtonStyle())