In [54]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Load the CSV
data = pd.read_csv('Neural.csv')

# Keep only useful columns
data = data[['Features', 'Label']]

# Split into train (first 10000) and test (next 10000)
train_data = data.iloc[:10000]
test_data = data.iloc[10000:]

# Features and Labels
X = train_data['Features']
y = train_data['Label']

# Split into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Vectorize the text
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_val_vec = vectorizer.transform(X_val)
# Train the model
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train_vec, y_train)

# Validate the model
y_val_pred = model.predict(X_val_vec)
accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

# Predict for test data
X_test = test_data['Features']
X_test_vec = vectorizer.transform(X_test)
y_test_pred = model.predict(X_test_vec)

# Save the predictions
test_data['Predicted_Label'] = y_test_pred
test_data.to_csv('Predicted_Labels.csv', index=False)

print("Predictions saved successfully to 'Predicted_Labels.csv'")

Validation Accuracy: 86.95%
Predictions saved successfully to 'Predicted_Labels.csv'


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data['Predicted_Label'] = y_test_pred


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression

# Step 1: Create example data
# Each row is a sample: [feature1, feature2]
X = np.array([
    [1, 2],
    [2, 3],
    [3, 3],
    [4, 5],
    [6, 7],   # Class 0 (blue)
    [7, 8],
    [8, 8],
    [9, 10],
    [10, 12], # Class 1 (red)
    [11, 13]
])

# Labels: 0 = Not Spam, 1 = Spam
y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

# Step 2: Train the Logistic Regression model
model = LogisticRegression()
model.fit(X, y)

# Step 3: Predict using the trained model
predictions = model.predict(X)

# Step 4: Visualize the decision boundary
# Create a mesh grid for background
x_min, x_max = X[:, 0].min()-1, X[:, 0].max()+1
y_min, y_max = X[:, 1].min()-1, X[:, 1].max()+1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 1),
                     np.linspace(y_min, y_max, 1))
grid = np.c_[xx.ravel(), yy.ravel()]
probs = model.predict(grid).reshape(xx.shape)

# Plot
plt.contourf(xx, yy, probs, alpha=0.3, cmap='coolwarm')
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolors='k')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Logistic Regression Decision Boundary')
plt.show()


: 

In [44]:
import numpy as np
X = np.array([
    [1, 2],
    [2, 3],
    [3, 3],
    [4, 5],
    [6, 7],   # Class 0 (blue)
    [7, 8],
    [8, 8],
    [9, 10],
    [10, 12], # Class 1 (red)
    [11, 13]
])
x_min, x_max = X[:, 0].min()-1, X[:, 0].max()+1
y_min, y_max = X[:, 1].min()-1, X[:, 1].max()+1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 3),
                     np.linspace(y_min, y_max, 3))
grid = np.c_[xx.ravel(), yy.ravel()]
probs = model.predict(grid).reshape(xx.shape)
print(probs)  # Check the shape of the probabilities
#print(grid)  # Check the shape of the grid
# print(yy)  # Check the shape of the mesh grid
# print(xx)  # Check the shape of the mesh grid

[[0 0 1]
 [0 0 1]
 [0 1 1]]


In [57]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

data = pd.read_csv('Neural.csv')
data = data[['Features', 'Label']]
train_data = data.iloc[:10000]
test_data = data.iloc[10000:]
X = train_data['Features']
y = train_data['Label']
x_train, x_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state = 42)
vectorizer = TfidfVectorizer(max_features = 5000)
x_train_vec = vectorizer.fit_transform(x_train)
x_test_vec = vectorizer.transform(x_test)

model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(x_train_vec, y_train)
y_test_pred = model.predict(x_test_vec)
accuracy = accuracy_score(y_test, y_test_pred)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

Validation Accuracy: 86.95%
