# Importing Libraries

In [2]:
import pandas as pd
import numpy as np

### DecisionTree using Python Function

In [10]:
class DecisionTreeClassifier:
    def __init__(self, max_depth=5, min_samples_split=2):
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.tree = None

    def gini_index(self, groups, classes):
        """Calculate Gini Index for a split."""
        n_instances = float(sum(len(group) for group in groups))
        gini = 0.0
        for group in groups:
            size = float(len(group))
            if size == 0:
                continue
            score = 0.0
            for class_val in classes:
                proportion = (group[:, -1] == class_val).sum() / size
                score += proportion ** 2
            gini += (1.0 - score) * (size / n_instances)
        return gini

    def test_split(self, index, value, dataset):
        """Split a dataset based on a feature and value."""
        left, right = [], []
        for row in dataset:
            if row[index] < value:
                left.append(row)
            else:
                right.append(row)
        return np.array(left), np.array(right)

    def get_split(self, dataset):
        """Find the best split for a dataset."""
        class_values = list(set(row[-1] for row in dataset))
        best_index, best_value, best_score, best_groups = 999, 999, float("inf"), None
        for index in range(dataset.shape[1] - 1):
            for row in dataset:
                groups = self.test_split(index, row[index], dataset)
                gini = self.gini_index(groups, class_values)
                if gini < best_score:
                    best_index, best_value, best_score, best_groups = index, row[index], gini, groups
        return {"index": best_index, "value": best_value, "groups": best_groups}

    def to_terminal(self, group):
        """Create a terminal node."""
        outcomes = [row[-1] for row in group]
        return max(set(outcomes), key=outcomes.count)

    def split(self, node, depth):
        """Create child splits for a node."""
        left, right = node["groups"]
        del node["groups"]

        # Handle cases where either left or right group is empty
        if len(left) == 0:
            node["left"] = node["right"] = self.to_terminal(right)
            return
        if len(right) == 0:
            node["left"] = node["right"] = self.to_terminal(left)
            return

        # Check for max depth
        if depth >= self.max_depth:
            node["left"], node["right"] = self.to_terminal(left), self.to_terminal(right)
            return

        # Process left child
        if len(left) <= self.min_samples_split:
            node["left"] = self.to_terminal(left)
        else:
            node["left"] = self.get_split(left)
            self.split(node["left"], depth + 1)

        # Process right child
        if len(right) <= self.min_samples_split:
            node["right"] = self.to_terminal(right)
        else:
            node["right"] = self.get_split(right)
            self.split(node["right"], depth + 1)

    def build_tree(self, train):
        """Build the decision tree."""
        root = self.get_split(train)
        self.split(root, 1)
        return root

    def fit(self, X, y):
        """Fit the decision tree to the data."""
        dataset = np.column_stack((X, y))
        self.tree = self.build_tree(dataset)

    def predict_row(self, node, row):
        """Make a prediction for a single row."""
        if row[node["index"]] < node["value"]:
            if isinstance(node["left"], dict):
                return self.predict_row(node["left"], row)
            else:
                return node["left"]
        else:
            if isinstance(node["right"], dict):
                return self.predict_row(node["right"], row)
            else:
                return node["right"]

    def predict(self, X):
        """Make predictions for a dataset."""
        return [self.predict_row(self.tree, row) for row in X]

# Load your dataset
df = pd.read_csv("C:\\Users\\DELL8\\OneDrive\\Pictures\\train.csv", usecols=["Age", "Fare"])
df = df.dropna()  # Drop rows with missing values

# Create a target column for demonstration purposes
# For example: classify as 1 if Fare > median, else 0
df['Target'] = (df['Fare'] > df['Fare'].median()).astype(int)

# Prepare features and labels
X = df[["Age", "Fare"]].values
y = df["Target"].values

# Train the decision tree
tree = DecisionTreeClassifier(max_depth=3)
tree.fit(X, y)

# Make predictions
predictions = tree.predict(X)



In [11]:
# Output results
df["Predictions"] = predictions
print(df)


      Age     Fare  Target  Predictions
0    22.0   7.2500       0          0.0
1    38.0  71.2833       1          1.0
2    26.0   7.9250       0          0.0
3    35.0  53.1000       1          1.0
4    35.0   8.0500       0          0.0
..    ...      ...     ...          ...
885  39.0  29.1250       1          1.0
886  27.0  13.0000       0          0.0
887  19.0  30.0000       1          1.0
889  26.0  30.0000       1          1.0
890  32.0   7.7500       0          0.0

[714 rows x 4 columns]


###  LinearRegression Using Python Function

In [13]:
class LinearRegression:
    def __init__(self):
        self.coefficients = None

    def fit(self, X, y):
        """Fit the linear regression model using the normal equation."""
        # Add a bias term (column of ones) to X
        X = np.column_stack((np.ones(X.shape[0]), X))
        
       
        X_transpose = X.T
        self.coefficients = np.linalg.inv(X_transpose @ X) @ X_transpose @ y

    def predict(self, X):
        """Make predictions using the linear regression model."""
        
        X = np.column_stack((np.ones(X.shape[0]), X))
        
        # Predict using the coefficients
        return X @ self.coefficients

# Load your dataset
df = pd.read_csv("C:\\Users\\DELL8\\OneDrive\\Pictures\\train.csv", usecols=["Age", "Fare"])
df = df.dropna() 
df['Target'] = df['Fare']


X = df[["Age"]].values  
y = df["Target"].values 

model = LinearRegression()
model.fit(X, y)

# Make predictions
predictions = model.predict(X)




In [14]:
df["Predictions"] = predictions
print(df)

      Age     Fare   Target  Predictions
0    22.0   7.2500   7.2500    32.000102
1    38.0  71.2833  71.2833    37.599521
2    26.0   7.9250   7.9250    33.399957
3    35.0  53.1000  53.1000    36.549630
4    35.0   8.0500   8.0500    36.549630
..    ...      ...      ...          ...
885  39.0  29.1250  29.1250    37.949485
886  27.0  13.0000  13.0000    33.749921
887  19.0  30.0000  30.0000    30.950211
889  26.0  30.0000  30.0000    33.399957
890  32.0   7.7500   7.7500    35.499739

[714 rows x 4 columns]


### Gradient Boosting Regressor  Using Python Function

In [17]:
class GradientBoostingRegressor:
    def __init__(self, n_estimators=100, learning_rate=0.1, max_depth=3):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.max_depth = max_depth
        self.models = []

    def fit(self, X, y):
        """Fit the Gradient Boosting model."""
       
        predictions = np.full(y.shape, y.mean())
        self.models = []
        
        for _ in range(self.n_estimators):
           
            residuals = y - predictions
            
          
            tree = DecisionTreeRegressor(max_depth=self.max_depth)
            tree.fit(X, residuals)
            self.models.append(tree)
            
            # Update predictions
            predictions += self.learning_rate * tree.predict(X)

    def predict(self, X):
        """Make predictions using the Gradient Boosting model."""
       
        predictions = np.full((X.shape[0],), 0.0)
        
        for model in self.models:
            predictions += self.learning_rate * model.predict(X)
        
        return predictions

class DecisionTreeRegressor:
    def __init__(self, max_depth=3):
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X, y):
        """Fit a decision tree to the data."""
        dataset = np.column_stack((X, y))
        self.tree = self._build_tree(dataset, depth=0)

    def _build_tree(self, dataset, depth):
        """Recursively build the decision tree."""
        if depth >= self.max_depth or len(dataset) <= 1:
            return np.mean(dataset[:, -1])  # Return mean of target as leaf
        
        # Find the best split
        best_split = self._get_best_split(dataset)
        if not best_split:
            return np.mean(dataset[:, -1])  # Return mean if no split improves the model
        
        left_tree = self._build_tree(best_split['left'], depth + 1)
        right_tree = self._build_tree(best_split['right'], depth + 1)
        return {'index': best_split['index'], 'value': best_split['value'], 'left': left_tree, 'right': right_tree}

    def _get_best_split(self, dataset):
        """Find the best split for the dataset."""
        best_split = None
        best_loss = float('inf')
        for index in range(dataset.shape[1] - 1):
            for value in dataset[:, index]:
                left, right = self._split(dataset, index, value)
                loss = self._calculate_loss(left, right)
                if loss < best_loss:
                    best_loss = loss
                    best_split = {'index': index, 'value': value, 'left': left, 'right': right}
        return best_split

    def _split(self, dataset, index, value):
        """Split dataset into left and right based on a feature and value."""
        left = dataset[dataset[:, index] < value]
        right = dataset[dataset[:, index] >= value]
        return left, right

    def _calculate_loss(self, left, right):
        """Calculate mean squared error loss for a split."""
        def mse(group):
            if len(group) == 0:
                return 0
            return np.mean((group[:, -1] - np.mean(group[:, -1]))**2)
        
        return mse(left) * len(left) + mse(right) * len(right)

    def predict(self, X):
        """Make predictions using the decision tree."""
        return np.array([self._predict_row(self.tree, row) for row in X])

    def _predict_row(self, node, row):
        """Predict a single row using the decision tree."""
        if isinstance(node, dict):
            if row[node['index']] < node['value']:
                return self._predict_row(node['left'], row)
            else:
                return self._predict_row(node['right'], row)
        return node

# Load your dataset
df = pd.read_csv("C:\\Users\\DELL8\\OneDrive\\Pictures\\train.csv", usecols=["Age", "Fare"])
df = df.dropna() 
df['Target'] = df['Fare']  
X = df[["Age"]].values  
y = df["Target"].values  


gbr = GradientBoostingRegressor(n_estimators=10, learning_rate=0.1, max_depth=3)
gbr.fit(X, y)


predictions = gbr.predict(X)




  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [16]:
# Output results
df["Predictions"] = predictions
print(df)

      Age     Fare   Target  Predictions
0    22.0   7.2500   7.2500    -3.750592
1    38.0  71.2833  71.2833     7.350557
2    26.0   7.9250   7.9250    -6.063680
3    35.0  53.1000  53.1000    35.573872
4    35.0   8.0500   8.0500    35.573872
..    ...      ...      ...          ...
885  39.0  29.1250  29.1250     3.814389
886  27.0  13.0000  13.0000    -5.671832
887  19.0  30.0000  30.0000    -5.627645
889  26.0  30.0000  30.0000    -6.063680
890  32.0   7.7500   7.7500    -5.671832

[714 rows x 4 columns]
