In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import linear_model
from sklearn import preprocessing
from sklearn import metrics

# Setting the seed for the random number generator

# 2. my student ID is 1564710, so I set the random seed as 10
np.random.seed(10)

# Generate random data points
# 7. there are 100 samples
num_samples = 100
# 3. there are 2 features
feature_1 = np.random.uniform(-10, 10, num_samples)
feature_2 = np.random.uniform(-10, 10, num_samples)

# print(feature_1)
# print(feature_2) 

# Generate target variable based on a polynomial relationship with some added noise
target = 0.5*feature_1**2 + 2*feature_1*feature_2 - 3*feature_2 + 5 
+ np.random.normal(0, 5, num_samples)

# Combine into a DataFrame

# 4. predicting y is a supervised learning problem 
# because my dataset is a labeled dataset, I know the values of both the features
# ('Feature_1' and 'Feature_2') and the target variable ('Target').

data_regression = pd.DataFrame({ 
'Feature_1': feature_1,
'Feature_2': feature_2,
'Target': target
})
data_regression.head()


In [None]:
# Visualizing the dataset in a 3D scatter plot since we have two features and one target
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
# Scatter plot of the features and target
ax.scatter(data_regression['Feature_1'], data_regression['Feature_2'],
data_regression['Target'], c='blue', marker='o')
ax.set_xlabel('Feature 1')
ax.set_ylabel('Feature 2')

# 5. The z-axis represents the label ( target variable 'Target')
ax.set_zlabel('Target')
plt.title('3D Visualization of Regression Data Points')
plt.show()

# 6. This is a regression problem because the goal is to predict
# a continuous numerical value ('Target'), 
# rather than classifying inputs into discrete categories.




In [None]:
from sklearn.model_selection import train_test_split

# Splitting the data into features (X) and target variable (y)
X = data_regression[['Feature_1', 'Feature_2']]
y = data_regression['Target']

# Splitting the data into approximately 80% training and 20% testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Checking the shapes of the training and testing sets
print("Training set shape - X:", X_train.shape, " y:", y_train.shape)
print("Testing set shape - X:", X_test.shape, " y:", y_test.shape)