In [1]:
import numpy as np
from scipy.sparse import csr_matrix

# Step 1: Create a dense matrix
dense_matrix = np.array([
    [0, 0, 1],
    [2, 0, 0],
    [0, 3, 0]
])

print("Dense matrix:\n", dense_matrix)

Dense matrix:
 [[0 0 1]
 [2 0 0]
 [0 3 0]]


In [2]:
# Step 2: Convert dense matrix to sparse (CSR format)
sparse_matrix = csr_matrix(dense_matrix)
print("\nSparse matrix (CSR format):\n", sparse_matrix)


Sparse matrix (CSR format):
   (0, 2)	1
  (1, 0)	2
  (2, 1)	3


In [3]:
# Step 3: View the non-zero elements
print("\nData:", sparse_matrix.data)         # non-zero values
print("Indices:", sparse_matrix.indices)     # column indices
print("Indptr:", sparse_matrix.indptr)       # index pointer fo


Data: [1 2 3]
Indices: [2 0 1]
Indptr: [0 1 2 3]


In [4]:
# Step 4: Convert back to dense
dense_from_sparse = sparse_matrix.toarray()
print("\nConverted back to dense:\n", dense_from_sparse)


Converted back to dense:
 [[0 0 1]
 [2 0 0]
 [0 3 0]]


In [5]:
# Step 5: Matrix-vector multiplication
vector = np.array([1, 2, 3])
result = sparse_matrix.dot(vector)
print("\nMatrix-vector multiplication result:\n", result)


Matrix-vector multiplication result:
 [3 2 6]


In [6]:
# Step 6: Transpose and multiply
transpose_result = sparse_matrix.transpose().dot(vector)
print("\nTranspose matrix-vector multiplication:\n", transpose_result)


Transpose matrix-vector multiplication:
 [4 9 1]


In [7]:
from scipy.sparse import csr_matrix

# Create a sparse matrix from dense
dense = [[0, 0, 3], [4, 0, 0], [0, 5, 0]]
sparse = csr_matrix(dense)

# Convert back to dense
dense_back = sparse.toarray()

# Matrix-vector multiplication
import numpy as np
x = np.array([1, 2, 3])
result = sparse @ x  # or sparse.dot(x)

# Matrix-matrix multiplication
from scipy.sparse import random
A = random(1000, 1000, density=0.01, format='csr')
B = random(1000, 1000, density=0.01, format='csr')
C = A @ B


In [8]:
from pyspark.ml.linalg import Vectors

# Create a SparseVector (5 elements, non-zeros at index 1 and 3)
sparse_vec = Vectors.sparse(5, {1: 1.0, 3: 7.0})
print("Sparse Vector:", sparse_vec)

# Convert to dense array (as numpy array)
dense_vec = sparse_vec.toArray()
print("Dense Vector:", dense_vec)


Sparse Vector: (5,[1,3],[1.0,7.0])
Dense Vector: [0. 1. 0. 7. 0.]


In [9]:
import numpy as np

dense = np.array([
    [1, 0, 0],
    [0, 0, 2],
    [0, 3, 0]
])


In [10]:
dense

array([[1, 0, 0],
       [0, 0, 2],
       [0, 3, 0]])

In [11]:
from scipy.sparse import csr_matrix

sparse = csr_matrix([
    [1, 0, 0],
    [0, 0, 2],
    [0, 3, 0]
])


In [12]:
sparse

<3x3 sparse matrix of type '<class 'numpy.int64'>'
	with 3 stored elements in Compressed Sparse Row format>

In [13]:
# Convert dense to sparse
sparse = csr_matrix(dense)

# Convert sparse to dense
dense_back = sparse.toarray()


In [14]:
dense_back

array([[1, 0, 0],
       [0, 0, 2],
       [0, 3, 0]])

In [17]:
import xgboost
print(xgboost.__version__)

2.1.4


In [15]:
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from scipy.sparse import csr_matrix


# Load sample binary classification dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Convert to sparse matrix (optional, just to simulate sparse input)
X_sparse = csr_matrix(X)

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X_sparse, y, test_size=0.2, random_state=42)



In [21]:
model = xgb.XGBClassifier(
    objective="binary:logistic",
    # use_label_encoder=False,
    eval_metric="logloss"
)

# Fit model (XGBoost accepts sparse input directly)
model.fit(X_train, y_train)


In [22]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

