In [None]:
class DataAnalyzer:
    def __init__(self, data_path):
        self.data = pd.read_csv(data_path)
        self.columns = self.data.columns
        self.features = None
        self.target = None
        self.model = None

    def import_data(self, data_path):
        self.data = pd.read_csv(data_path)
        print("Data imported successfully")
        print(f"Data shape: {self.data.shape}")

    def show_data(self, n=5):
        print(self.data.head(n))

    def explore_dataset(self):
        # Basic statistics
        print("Basic Statistics:")
        print(self.data.describe())

        # Pairplot for visual exploration
        sns.pairplot(self.data)
        plt.show()

    def show_data_info(self):
        print(self.data.info())

    def show_column_details(self):
        columns = self.columns
        print("Column details:")
        print(f"Number of columns: {len(columns)}")
        print()
        for column in columns:
            print(f"Column name: {column}")
            print(f"Column type: {self.data[column].dtype}")
            print(f"Number of unique values: {self.data[column].nunique()}")
            if len(self.data[column].unique()) < 10:
                print(f"Unique values: {self.data[column].unique()}")
            print(
                f"Number of missing values: {self.data[column].isnull().sum()}")
            print(
                f"Number of zero values: {len(self.data[self.data[column] == 0])}")
            if self.data[column].dtype == "object":
                print(
                    f"Number of empty strings: {len(self.data[self.data[column] == ''])}")
            else:
                print(
                    f"Number of positive values: {len(self.data[self.data[column] > 0])}")
                print(
                    f"Number of negative values: {len(self.data[self.data[column] < 0])}")
            print()

    def drop_missing_values(self):
        self.data = self.data.dropna()

    def fill_missing_values(self, strategy="mean"):
        if strategy == "mean":
            self.data = self.data.fillna(self.data.mean())
        elif strategy == "median":
            self.data = self.data.fillna(self.data.median())
        elif strategy == "mode":
            self.data = self.data.fillna(self.data.mode())
        elif strategy == "filna":
            self.data = self.data.fillna(0)
        else:
            raise ValueError("Invalid strategy")

    def merge_dataframes(self, other_df, on_column):
        self.data = pd.merge(self.data, other_df, on=on_column)
        print("Dataframes merged successfully")

    def join_dataframes(self, other_df, on_column):
        self.data = self.data.join(other_df.set_index(on_column), on=on_column)
        print("Dataframes joined successfully")

    def concatenate_dataframes(self, other_df):
        self.data = pd.concat([self.data, other_df], axis=1)
        print("Dataframes concatenated successfully")

    def set_features_target(self, features, target):
        self.features = features
        self.target = target
        print("Features and target set successfully")

    def split_train_test_data(self, test_size=0.2, random_state=42):
        X_train, X_test, y_train, y_test = train_test_split(
            self.data[self.features], self.data[self.target], test_size=test_size, random_state=random_state
        )
        print("Data split successfully")
        return X_train, X_test, y_train, y_test

    def train_linear_regression_model(self, X_train, y_train):
        print("Training model...")
        self.model = LinearRegression()
        self.model.fit(X_train, y_train)
        print("Model trained successfully")

    def predict(self, X_test):
        return self.model.predict(X_test)

    def evaluate_model(self, X_test, y_test):
        predictions = self.predict(X_test)
        mae = np.mean(np.abs(predictions - y_test))
        mse = mean_squared_error(y_test, predictions)
        rmse = np.sqrt(mse)
        nrmse = rmse / np.mean(y_test)
        r2 = r2_score(y_test, predictions)

        print(f"Mean Absolute Error: {mae}")
        print(f"Mean Squared Error: {mse}")
        print(f"Root Mean Squared Error: {rmse}")
        print(f"Normalized Root Mean Squared Error: {nrmse}")
        print(f"R-squared: {r2}")

    def plot_predictions(self, X_test, y_test):
        predictions = self.predict(X_test)

        plt.scatter(y_test, predictions)
        plt.xlabel("True Values")
        plt.ylabel("Predictions")
        plt.show()