In [6]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression


class AnalysisDataAndataitLinearRegression:

    def __init__(self):
        self.version = 1

    def analyse_and_fit_lrm(self, path):
        # a path to a dataset is "./data/realest.csv"
        # dataset can be loaded by uncommenting the line bellow
        data = pd.read_csv(path)

        summary_dict = {}

        # Collecting the Statistics list
        filtered_data = data[(data["Bedroom"] == 4) & (data["Bathroom"] == 2)]

        statistics  = []
        statistics.append(filtered_data["Tax"].mean())
        statistics.append(filtered_data["Tax"].std())
        statistics.append(filtered_data["Tax"].median())
        statistics.append(filtered_data["Tax"].min())
        statistics.append(filtered_data["Tax"].max())

        summary_dict['statistics'] = statistics

        #   building the data of space > 800
        data_space800 = data[data["Space"] > 800] 
        sorted_space = data_space800.sort_values(by = 'Price', ascending = False)  # sorting the data depending on the price column
        summary_dict['data_frame'] = sorted_space


        # calculating the number of observation 
        m =  data["Lot"].quantile(4/5) 
        observations =  data["Lot"] >= m 
        number_observation = observations.sum() 

        summary_dict['number_of_observations'] = number_observation


        data = data.dropna()

        #building the model
        target = "Price"
        y = data[target]
        X = data.drop('Price',axis = 1)
        model = LinearRegression()
        model.fit(X,y)
        intercept = model.intercept_
        coefficient = model.coef_
        model_parameters = {
                    'Intercept': intercept,
                    'Bedroom': coefficient[0],
                    'Space': coefficient[1],
                    'Room': coefficient[2],
                    'Lot': coefficient[3],
                    'Tax': coefficient[4],
                    'Bathroom': coefficient[5],
                    'Garage': coefficient[6],
                    'Condition': coefficient[7],
                    }

        test = {
            'Bedroom': 3,
            'Space': 1500,
            'Room': 8,
            'Lot': 40,
            'Tax': 1000,
            'Bathroom': 2,
            'Garage': 1,
            'Condition': 0
        }

        data_raw = pd.DataFrame([test], columns=test.keys())
        

        price_prediction = model.predict(data_raw)


        regression_dict = {
                "model_parameters": model_parameters,
                "price_prediction": price_prediction
        }

        Answer = {}

        Answer["summary_dict"] = summary_dict
        Answer["regression_dict"] = regression_dict

        return Answer



    def __listwise_deletion(self, data: pd.DataFrame):
        return data.dropna()


In [7]:
test = AnalysisDataAndataitLinearRegression()

test.analyse_and_fit_lrm('realest.csv')

{'summary_dict': {'statistics': [443.6,
   23.28733561401991,
   440.0,
   418.0,
   470.0],
  'data_frame':      Price  Bedroom   Space  Room   Lot     Tax  Bathroom  Garage  Condition
  114   90.0      8.0  2293.0  12.0  50.0  1181.0       3.0     2.0        0.0
  88    88.0      8.0  2228.0  12.0  50.0  1208.0       3.0     2.0        0.0
  36    88.0      8.0  2278.0  12.0  50.0  1183.0       3.0     2.0        0.0
  140   88.0      7.0  2277.0  12.0  50.0  1248.0       3.0     2.0        0.0
  62    85.0      7.0  2295.0  12.0  50.0  1233.0       3.0     2.0        0.0
  ..     ...      ...     ...   ...   ...     ...       ...     ...        ...
  45    35.0      5.0  1142.0   7.0  25.0   543.0       1.5     0.0        0.0
  71    34.0      4.0  1095.0   7.0  25.0   530.0       1.5     0.0        0.0
  97    34.0      4.0  1087.0   7.0  25.0   584.0       1.5     0.0        0.0
  149   32.0      4.0  1065.0   7.0  25.0   492.0       1.5     0.0        0.0
  156    NaN      NaN   