In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

import plotly.graph_objects as go

In [32]:
# Define the input and output data
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 5, 4, 5])

# Define the range of weight values to plot
w_range = np.linspace(-10, 12, 100)

# Define the MSE cost function
def mse_weight(w):
    y_pred = w * x
    return np.mean((y_pred - y) ** 2) / 2

# Compute the cost function values for each weight value
costs = np.array([mse_weight(w) for w in w_range])

# Create the trace for the cost function plot
trace = go.Scatter(x=w_range, y=costs, mode='lines', name='MSE')

# Create the layout for the plot
layout = go.Layout(title='MSE Cost Function', xaxis_title='Weight (w)', yaxis_title='Cost', template='plotly_dark')

# Create the figure and add the trace and layout
fig = go.Figure(data=[trace], layout=layout)

# Show the figure
fig.show()

In [33]:
# Define the function to optimize (MSE)
def mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2) / 2

# Define the gradient of the function
def gradient_mse(y_true, y_pred, x):
    return np.mean((y_pred - y_true) * x), np.mean(y_pred - y_true)

# Define the input and output data
x = np.array([1, 2, 3, 4, 5])
y = np.array([2, 4, 5, 4, 5])

# Define the range of weight values to plot
w_range = np.linspace(-10, 12, 100)

# Define the learning rate
learning_rate = 0.01

# Define the initial weights
w0 = -7

# Initialize the trace for the cost function plot
cost_trace = go.Scatter(x=[w0], y=[mse(y, w0*x)], mode='markers', marker=dict(size=10))

# Perform gradient descent for a fixed number of steps
for i in range(50):
    # Compute the predicted values using the current weights
    y_pred = w0 * x
    
    # Compute the gradient of the cost function
    grad_w = gradient_mse(y, y_pred, x)
    
    # Update the weights using gradient descent
    w0 -= learning_rate * grad_w[0]
    
    # Add the new weights and cost to the cost trace
    cost_trace['x'] = list(cost_trace['x']) + [w0]
    cost_trace['y'] = list(cost_trace['y']) + [mse(y, w0*x)]
    
# Compute the cost function values for each weight value
costs = np.array([mse(y, w*x) for w in w_range])

# Create the trace for the cost function plot
trace = go.Scatter(x=w_range, y=costs, mode='lines', name='MSE')

# Initialize the layout for the plot
layout = go.Layout(title='Cost Function',
                    xaxis_title='Weight (w)',
                    yaxis_title='MSE',
                    template='plotly_dark')

# Create the figure and add the traces
fig = go.Figure(data=[cost_trace, trace], layout=layout)

# Show the figure
fig.show()

In [34]:
# Define the function to optimize
def f(x, y):
    return np.sin(x) + np.cos(y)

# Define the gradient of the function
def gradient(x, y):
    return np.array([np.cos(x), -np.sin(y)])

# Define the learning rate
learning_rate = 0.6

# Define the initial point
x0, y0 = np.random.randn(), np.random.randn()
# Initialize the trace for the surface plot
surface = go.Surface(x=np.linspace(-5, 5, 100),
                     y=np.linspace(-5, 5, 100),
                     z=f(*np.meshgrid(np.linspace(-5, 5, 100), np.linspace(-5, 5, 100))),
                     colorscale='Viridis',
                     opacity=0.8)

# Initialize the trace for the gradient descent path
path = go.Scatter3d(x=[x0], y=[y0], z=[f(x0, y0)], mode='markers', marker=dict(size=5, color='red'))

# Perform gradient descent for a fixed number of steps
for i in range(50):
    # Compute the gradient at the current point
    grad = gradient(x0, y0)
    
    # Update the point using gradient descent
    x0 -= learning_rate * grad[0]
    y0 -= learning_rate * grad[1]
    
    # Convert the x, y, and z attributes from tuples to lists before adding new points
    path['x'] = list(path['x']) + [x0]
    path['y'] = list(path['y']) + [y0]
    path['z'] = list(path['z']) + [f(x0, y0)]

# Initialize the layout for the plot
layout = go.Layout(title='Gradient Descent',
                   scene=dict(xaxis_title='X', yaxis_title='Y', zaxis_title='Z'))

# Create the figure and add the traces
fig = go.Figure(data=[surface, path], layout=layout)

# Show the figure
fig.show()

In [3]:
data = pd.read_csv('realest.csv')
data.head()

Unnamed: 0,Price,Bedroom,Space,Room,Lot,Tax,Bathroom,Garage,Condition
0,53.0,2.0,967.0,5.0,39.0,652.0,1.5,0.0,0.0
1,55.0,2.0,815.0,5.0,33.0,1000.0,1.0,2.0,1.0
2,56.0,3.0,900.0,5.0,35.0,897.0,1.5,1.0,0.0
3,58.0,3.0,1007.0,6.0,24.0,964.0,1.5,2.0,0.0
4,64.0,3.0,1100.0,7.0,50.0,1099.0,1.5,1.5,0.0


In [10]:
data.isnull().sum()

Price        0
Bedroom      0
Space        0
Room         0
Lot          0
Tax          0
Bathroom     0
Garage       0
Condition    0
dtype: int64

In [8]:
data.duplicated().sum()

0

In [9]:
data.dropna(inplace=True)
# data.drop_duplicates(inplace=True)

In [18]:
y = data['Price']
X = data['Space'].values.reshape(-1, 1)
x_train, x_cv, y_train, y_cv = train_test_split(X, y)

In [19]:
model = LinearRegression()
model.fit(x_train, y_train)

LinearRegression()

In [20]:
predictions = model.predict(x_cv)

In [21]:
predictions

array([52.07610795, 59.3625987 , 46.70208196, 78.4298724 , 55.48029523,
       49.26478474, 77.11027171, 62.02092472, 46.77858055, 48.51892348,
       79.13748436, 78.4298724 , 51.96136006, 56.53215085, 53.31921004,
       59.17135222, 55.53766917, 58.1194966 , 52.74547061, 54.94480509,
       49.80027488, 57.12501492, 53.64432905, 49.53252981, 54.84918186,
       59.49647123, 55.11692692, 58.61673744, 46.74033125, 46.93157773,
       49.28390939, 54.54318749])

In [22]:
mse = np.mean((predictions - y_cv) ** 2) / 2
mse

38.51623730598061

In [23]:
y = data['Price']
X = data.drop(['Price'], axis=1)
x_train, x_cv, y_train, y_cv = train_test_split(X, y)

In [24]:
model = LinearRegression()
model.fit(x_train, y_train)

LinearRegression()

In [25]:
predictions = model.predict(x_cv)

In [26]:
predictions[:5]

array([52.83938989, 54.77183263, 80.92876751, 59.00685931, 59.81138943])

In [27]:
y_cv[:5]

23     46.0
2      56.0
33     70.0
40     55.0
103    65.0
Name: Price, dtype: float64

In [28]:
mse = np.mean((predictions - y_cv) ** 2) / 2
mse

24.48391706190393

In [2]:
data = pd.read_csv('realest.csv')
data.head()

Unnamed: 0,Price,Bedroom,Space,Room,Lot,Tax,Bathroom,Garage,Condition
0,53.0,2.0,967.0,5.0,39.0,652.0,1.5,0.0,0.0
1,55.0,2.0,815.0,5.0,33.0,1000.0,1.0,2.0,1.0
2,56.0,3.0,900.0,5.0,35.0,897.0,1.5,1.0,0.0
3,58.0,3.0,1007.0,6.0,24.0,964.0,1.5,2.0,0.0
4,64.0,3.0,1100.0,7.0,50.0,1099.0,1.5,1.5,0.0


In [4]:
data.fillna(data.mean(), inplace=True)

In [6]:
y = data['Price']
X = data.drop(['Price'], axis=1)
x_train, x_cv, y_train, y_cv = train_test_split(X, y)

In [7]:
model = LinearRegression()
model.fit(x_train, y_train)

LinearRegression()

In [8]:
predictions = model.predict(x_cv)
predictions[:5], y_cv[:5]

(array([57.36507552, 55.85692365, 58.65544361, 57.13071844, 77.41520116]),
 49    45.0
 32    49.0
 1     55.0
 75    43.0
 9     82.0
 Name: Price, dtype: float64)

In [9]:
mse = np.mean((predictions - y_cv)**2) / 2
mse

22.18885583967233