In [None]:
import pandas as pd
import plotly.express as px
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Load the dataset
df = px.data.iris()

# Define features and target
X = df[['sepal_width']]  
y = df['sepal_length']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Print model parameters
print(f"Intercept: {model.intercept_}")
print(f"Coefficient (Slope) for sepal_width: {model.coef_}")

# Predict on the test set
y_pred = model.predict(X_test)

# Calculate Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Plot the results
plt.scatter(X_test, y_test, color='blue', label='Actual data')
plt.plot(X_test, y_pred, color='red', label='Fitted line')
plt.title('Bivariate Regression: Sepal Width vs Sepal Length')
plt.xlabel('Sepal Width')
plt.ylabel('Sepal Length')
plt.legend()
plt.show()

In [None]:
df=pd.read_csv("cric.csv")
print(df[df['Runs'] > 10000])

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import plotly.express as px

df = pd.read_csv("cric.csv")
df = df.drop(columns=['Unnamed: 0', 'Player', 'Span'])
df = df.apply(pd.to_numeric, errors='coerce')
df = df.dropna()

model = LinearRegression()

x = df[['Ave']]
y = df['Runs']

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.3, random_state=50)
model.fit(x_train, y_train)

print(f'Intercept={model.intercept_}')
print(f'Coefficient={model.coef_}')

y_pred = model.predict(x_test)
print(f'Mean squared error={mean_squared_error(y_test, y_pred)}')
plt.xlabel("Actual Runs")
plt.ylabel("Predicted Runs")
plt.scatter(y_test, y_pred)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, confusion_matrix
from sklearn.linear_model import LogisticRegression

df = pd.read_csv("cric.csv")
df = df.drop(columns=['Unnamed: 0', 'Player', 'Span'])
df = df.apply(pd.to_numeric, errors='coerce')
df = df.dropna()

median = df['Runs'].median()
df['Runs-Category'] = (df['Runs'] > median).astype(int)

x = df[['Ave']]
y = df['Runs-Category']

model = LogisticRegression()
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.3, random_state=50)
model.fit(x_train, y_train)

print(f'intercept={model.intercept_}')
print(f'coefficient={model.coef_}')
y_pred = model.predict(x_test)

acc = accuracy_score(y_test, y_pred)
mat = confusion_matrix(y_test, y_pred)
print(f'accuracy={acc}')
print(f'confusion matrix={mat}')

# plt.figure(figsize=(7,9))
# plt.scatter(y_pred,y_test)
sns.heatmap(mat,annot=True)
plt.xlabel('Predicted')
plt.ylabel('Actual')
# plt.show()

In [None]:
import pandas as pd
import plotly.express as px
df = px.data.iris()
# print(df)
df['Petal-more-than-1.0'] = (df['petal_width'] > 1.0).astype(int)
print(pd.crosstab(df['petal_length'], df['Petal-more-than-1.0']))


In [None]:
import pandas as pd
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import plotly.io as pio
pio.renderers.default = "browser"  

# Load the Iris dataset
df = px.data.iris()

# Selecting only numerical features for PCA
features = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
X = df[features]

# Standardizing the data (important for PCA)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply PCA (reduce to 2 components for visualization)
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# Create a new DataFrame with PCA results
df_pca = pd.DataFrame(data=X_pca, columns=['PC1', 'PC2'])
df_pca['species'] = df['species']  # Keep species for visualization

# Visualizing the PCA results
fig = px.scatter(df_pca, x='PC1', y='PC2', color='species', 
                 title='PCA of Iris Dataset',
                 labels={'PC1': 'Principal Component 1', 'PC2': 'Principal Component 2'})
fig.show()


In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.io as pio
pio.renderers.default = 'browser'

df = px.data.iris()
cols = ['sepal_width','sepal_length','petal_length','petal_width']
new_df = df[cols]

scaler = StandardScaler()
x = scaler.fit_transform(new_df)

pc = PCA(n_components=2)
nxt = pc.fit_transform(x)

df1 = pd.DataFrame(data=nxt,columns=['PC1','PC2'])
df1['species'] = df['species']

fig=px.scatter(df1, x='PC1',y='PC2',color='species')
fig.show()
print(f'variance ratio={pc.explained_variance_ratio_}')
print(f'variance={pc.explained_variance_}')
# plt.scatter(df1['PC1'],df1['PC2'])
# plt.xlabel('PC1')
# plt.ylabel('PC2')
# plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

df = pd.read_csv("cric.csv")

df = df.drop(columns=['Unnamed: 0', 'Player', 'Span'])

df = df.apply(pd.to_numeric, errors='coerce')

df = df.dropna()

X = df[['Ave', 'SR']] 
y = df['Runs'] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)

print(f"Intercept: {model.intercept_}")
print(f"Coefficients: {model.coef_}")

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R² Score: {r2}")

plt.figure(figsize=(8,6))
sns.scatterplot(x=y_test, y=y_pred, alpha=0.4, color="blue")
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color="red", linestyle="--")
plt.xlabel("Actual Runs")
plt.ylabel("Predicted Runs")
plt.title("Actual vs Predicted Runs")
plt.show()


In [None]:
import pandas as pd
data = {
    "Player": ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"],
    "Runs": [8500, 4000, 6700, 2300, 5500, 9200, 1200, 6100, 7400, 3000],
    "Average": [45.2, 38.0, 41.8, 29.5, 42.7, 48.3, 27.4, 39.9, 44.1, 34.5],
    "Strike Rate": [82.5, 75.3, 79.2, 72.1, 86.4, 90.1, 68.5, 78.8, 81.2, 76.7]
}
df = pd.DataFrame(data)

high_runs = df["Runs"] >= 5000
high_avg = df["Average"] >= 40
high_sr = df["Strike Rate"] >= 80

# p_high_avg = high_avg.mean() 
# p_high_runs = high_runs.mean()  

p_high_runs_given_avg = df[high_runs & high_avg].shape[0] / df[high_avg].shape[0]
p_high_sr_given_runs = df[high_sr & high_runs].shape[0] / df[high_runs].shape[0]

print(f"P(High Runs | High Average) = {p_high_runs_given_avg:.4f}")
print(f"P(High Strike Rate | High Runs) = {p_high_sr_given_runs:.4f}")


In [None]:
import pandas as pd
import plotly.express as px
df = px.data.iris()

# larger length given larger width
petal_width=df['petal_width'] > 1
petal_length=df['petal_length'] > 4

width_mean=petal_width.mean()
length_mean=petal_length.mean()

res=df[petal_length & petal_width].shape[0] / df[petal_width].shape[0]
print(f'{res:.4f}')

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score , classification_report , confusion_matrix

df = pd.read_csv("diabetes.csv")

x = df.drop(columns = ["Outcome"])
y = df["Outcome"]

scaler = StandardScaler()
xScaled = scaler.fit_transform(x)

xTrain , xTest , yTrain , yTest = train_test_split(xScaled , y , test_size = 0.2 , random_state= 42)

model = SVC(kernel = "rbf" , C =1.0 , gamma = "scale")
model.fit(xTrain , yTrain)

yPred = model.predict(xTest)

accuracy = accuracy_score(yTest , yPred)
classR = classification_report(yTest , yPred)
cm = confusion_matrix(yTest , yPred)

print("Accuracy is: " , accuracy)
print("\n Classification report \n :" , classR)

plt.figure(figsize = (6 , 4))
sns.heatmap(cm , cmap = "coolwarm" , fmt = ".3f" , annot=True ,  xticklabels=["No Diabetes", "Diabetes"], yticklabels=["NoDiabetes", "Diabetes"])
plt.title("Confusion Matrix - SVM on Diabetes Dataset")
plt.show()

In [None]:
import pandas as pd
import plotly.express as px

df = px.data.iris()
# df.drop(columns=['Id'], inplace=True)

fig = px.scatter_3d(df, x="sepal_length", y="sepal_width", z="petal_length", color="species",
                     title="3D Scatter Plot - Plotly")
fig.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = px.data.iris()
# df.drop(columns=['Id'], inplace=True)

for species in df["species"].unique():
    subset = df[df["species"] == species]
    plt.scatter(subset["sepal_length"], subset["sepal_width"], label=species)

plt.xlabel("Sepal Length (cm)")
plt.ylabel("Sepal Width (cm)")
plt.title("Sepal Length vs Sepal Width")
plt.legend()
plt.show()


In [None]:
import numpy as np
data = np.random.rand(10, 10)
sns.heatmap(data, annot=True, cmap="coolwarm")
plt.title("Heatmap Example")
plt.show()



In [None]:
import pandas as pd
url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
# cols = ['pregnancies','glucose','bloodpressure','skinthickness','insulin','bmi','diabetespedigreefunction','age','outcome']
df = pd.read_csv(url,indexFalse)
print(df.head())

In [9]:

df.cov(numeric_only=True)
# df.corr(numeric_only=True)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species_id
sepal_length,0.685694,-0.039268,1.273682,0.516904,0.530872
sepal_width,-0.039268,0.188004,-0.321713,-0.117981,-0.148993
petal_length,1.273682,-0.321713,3.113179,1.296387,1.371812
petal_width,0.516904,-0.117981,1.296387,0.582414,0.597987
species_id,0.530872,-0.148993,1.371812,0.597987,0.671141
