## Perform Different data visualization for 2d and 3d visualization

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv('2d_3d_visualization_dataset.csv')
df
# Display the first few rows
print(df.head())
# Display the information about the dataset
print(df.describe())
# 2D Visualizations
# Create scatter plots for pairs of features
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
sns.scatterplot(x='Feature1', y='Feature2', data=df)
plt.title('Feature1 vs. Feature2')

plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 2)
sns.scatterplot(x='Feature1', y='Feature3', data=df)
plt.title('Feature1 vs. Feature3')

plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 3)
sns.scatterplot(x='Feature2', y='Feature3', data=df)
plt.title('Feature2 vs. Feature3')
plt.legend()
plt.show()

# Create a correlation matrix heatmap
plt.figure(figsize=(6, 5))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix')

# 3D Visualization
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(df['Feature1'], df['Feature2'], df['Feature3'])
ax.set_xlabel('Feature1')
ax.set_ylabel('Feature2')
ax.set_zlabel('Feature3')
plt.title('3D Scatter Plot of Features')
plt.legend()
# plt.savefig('3d_scatter_plot.png')

## Perform Regression over the dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression

df2 = pd.read_csv('simple_linear_regression_dataset.csv')
df2

df2.head()

X = df2[['X']]  # independent variable
y = df2['y']   # dependent variable

model = LinearRegression()
model.fit(X, y)

intercept = model.intercept_
coefficient = model.coef_[0]

print(f"\nModel Intercept: {intercept:.2f}")
print(f"Model Coefficient (slope): {coefficient:.2f}")

# Make predictions
y_pred = model.predict(X)

# Visualize the results
plt.figure(figsize=(10, 6))
plt.scatter(X, y, label='Actual Data')
plt.plot(X, y_pred, color='red', linewidth=2, label='Regression Line')
plt.title('Simple Linear Regression')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.grid(True)

## To Perform Data collection from online, local drive and .csv file.	

In [None]:
import pandas as pd
import requests
import matplotlib.pyplot as plt
import numpy as np

url="https://jsonplaceholder.typicode.com/posts"
response=requests.get(url)
data_online=pd.DataFrame(response.json())
print(data_online.head())

#localdrive
data_csv=pd.read_csv("candy-data.csv")
data_csv.head()

## Q4. Perform Classification of dataset.

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names

df = pd.DataFrame(X, columns=feature_names)
df['target'] = y
print(df.head())

scaler = StandardScaler()
X = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)


print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=iris.target_names))


## Q5. Perform decision tree operation over the dataset.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

dataset=pd.read_csv('play_tennis.csv')
dataset

Le=LabelEncoder()
dataset['outlook']=Le.fit_transform(dataset['outlook'])
dataset['temp']=Le.fit_transform(dataset['temp'])
dataset['humidity']=Le.fit_transform(dataset['humidity'])
dataset['wind']=Le.fit_transform(dataset['wind'])
dataset['play']=Le.fit_transform(dataset['play'])

x1=dataset.iloc[:,:-1].values
y1=dataset.iloc[:,4].values

from sklearn import tree
clf=tree.DecisionTreeClassifier(criterion='entropy')
clf=clf.fit(x1,y1)

tree.plot_tree(clf)

x_predict=clf.predict(x1)
x_predict==y1

## Q6. Implement classical golf case for playing golf game or not.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression

dataset2=pd.read_csv('play_tennis.csv')
dataset2

Le=LabelEncoder()
dataset2['outlook']=Le.fit_transform(dataset['outlook'])
dataset2['temp']=Le.fit_transform(dataset['temp'])
dataset2['humidity']=Le.fit_transform(dataset['humidity'])
dataset2['wind']=Le.fit_transform(dataset['wind'])
dataset2['play']=Le.fit_transform(dataset['play'])

X2=dataset2.drop('play',axis=1)
y2=dataset2['play']

#splitting the dataset
X1_train, X1_test, y1_train, y1_test = train_test_split(X2, y2, test_size=0.2, random_state=42)

#model building
model = LogisticRegression()
model.fit(X1_train, y1_train)

# Predict on test set
y_pred = model.predict(X1_test)

#model Evaluation
print("Accuracy:", accuracy_score(y1_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y1_test, y_pred))
print("Classification Report:\n", classification_report(y1_test, y_pred))

## Q7. Create a small stock market analysis for bull or bear for a stock in NSE and BSE.

In [None]:
import yfinance as yfp
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np


# Download stock data (NSE - RELIANCE)
data = yf.download("RELIANCE.NS", start="2024-01-01", end="2024-12-31")

# Find daily return
data['Return'] = data['Close'].pct_change()

# Label Bull/Bear
data['Trend'] = np.where(data['Return'] > 0, 'Bull', 'Bear')

# Show last few rows
print(data[['Close', 'Return', 'Trend']].tail())

# Plot price
data['Close'].plot(title="Reliance Stock Price (Bull/Bear)", figsize=(10,5))
plt.show()

## Q8. To Perform Data cleaning Operation over the data collected.	

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer


data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
print("First 5 rows:\n", df.head())


print("\nMissing values:\n", df.isnull().sum())

print("\nNumber of duplicate rows:", df.duplicated().sum())
df = df.drop_duplicates()

for col in df.select_dtypes(include=[np.number]).columns:
    lower, upper = df[col].quantile([0.01, 0.99])
    df[col] = df[col].clip(lower, upper)

df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")


print("\nSummary Statistics:\n", df.describe())

print("\nCleaned dataset shape:", df.shape)
