Array Creation:
--------------

numpy.array(): Create an array from a Python list or tuple.
    
numpy.arange(): Create an array with regularly spaced values.
    
numpy.linspace(): Create an array with evenly spaced values over a specified range.

numpy.zeros(): Create an array filled with zeros.

numpy.ones(): Create an array filled with ones.

numpy.eye(): Create an identity matrix.

numpy.random.rand(), numpy.random.randn(): Generate random numbers.

Array Manipulation:
------------------


numpy.reshape(): Reshape an array.

numpy.concatenate(), numpy.vstack(), numpy.hstack(): Concatenate arrays.

numpy.split(): Split an array into multiple sub-arrays.

numpy.transpose(), numpy.T: Transpose an array.

Mathematical Functions:
-----------------------


numpy.sum(), numpy.mean(), numpy.median(), numpy.std(): Basic statistical functions.


numpy.min(), numpy.max(): Minimum and maximum values.

numpy.absolute(), numpy.abs(): Absolute value.

numpy.sqrt(), numpy.exp(), numpy.log(): Square root, exponential, and logarithmic functions.

numpy.sin(), numpy.cos(), numpy.tan(): Trigonometric functions.

numpy.around(), numpy.floor(), numpy.ceil(): Rounding functions.

Linear Algebra:
--------------


numpy.dot(), numpy.matmul(): Matrix multiplication.

numpy.linalg.inv(): Inverse of a matrix.

numpy.linalg.det(): Determinant of a matrix.

numpy.linalg.eig(): Eigenvalues and eigenvectors of a square matrix.

numpy.linalg.solve(): Solve a system of linear equations.

Array Iteration:
----------------


numpy.nditer(): Efficient multi-dimensional iterator object.

Miscellaneous:
-------------

numpy.where(): Return elements chosen from two arrays based on a condition.

numpy.unique(): Find unique elements in an array.

numpy.argsort(): Return the indices that would sort an array.

numpy.argmax(), numpy.argmin(): Return the indices of the maximum and minimum values.

In [1]:
import numpy as np
import pandas as pd
from numpy import random

In [2]:
data=np.random.randint(1,101, size=(10,4))
data

array([[ 54,  33,  19,  95],
       [ 20,  77,  75,  54],
       [  2,  39,  90,  94],
       [100,  19,   4,  64],
       [ 81,  26,  55,  21],
       [ 47,  78,  46,  84],
       [ 34,  80,  56,  80],
       [ 46,  97,  34,  89],
       [ 97,  96,   8,  29],
       [ 89,  85,   6,  46]])

In [3]:
data1=pd.read_csv("D:\Data Set\wine-clustering.csv")
data1.head(5)

Unnamed: 0,Alcohol,Malic_Acid,Ash,Ash_Alcanity,Magnesium,Total_Phenols,Flavanoids,Nonflavanoid_Phenols,Proanthocyanins,Color_Intensity,Hue,OD280,Proline
0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [4]:
data1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Alcohol               178 non-null    float64
 1   Malic_Acid            178 non-null    float64
 2   Ash                   178 non-null    float64
 3   Ash_Alcanity          178 non-null    float64
 4   Magnesium             178 non-null    int64  
 5   Total_Phenols         178 non-null    float64
 6   Flavanoids            178 non-null    float64
 7   Nonflavanoid_Phenols  178 non-null    float64
 8   Proanthocyanins       178 non-null    float64
 9   Color_Intensity       178 non-null    float64
 10  Hue                   178 non-null    float64
 11  OD280                 178 non-null    float64
 12  Proline               178 non-null    int64  
dtypes: float64(11), int64(2)
memory usage: 18.2 KB


In [5]:
data1.isnull().sum()

Alcohol                 0
Malic_Acid              0
Ash                     0
Ash_Alcanity            0
Magnesium               0
Total_Phenols           0
Flavanoids              0
Nonflavanoid_Phenols    0
Proanthocyanins         0
Color_Intensity         0
Hue                     0
OD280                   0
Proline                 0
dtype: int64

In [6]:
df=data1.groupby('Alcohol')


In [7]:
df['Ash'].aggregate(np.mean)

Alcohol
11.03    2.20
11.41    2.50
11.45    2.42
11.46    1.82
11.56    3.23
         ... 
14.37    2.50
14.38    2.33
14.39    2.45
14.75    2.39
14.83    2.17
Name: Ash, Length: 126, dtype: float64

In [8]:
df['Ash'].max()

Alcohol
11.03    2.20
11.41    2.50
11.45    2.42
11.46    1.82
11.56    3.23
         ... 
14.37    2.50
14.38    2.38
14.39    2.45
14.75    2.39
14.83    2.17
Name: Ash, Length: 126, dtype: float64

In [9]:
data1.dtypes

Alcohol                 float64
Malic_Acid              float64
Ash                     float64
Ash_Alcanity            float64
Magnesium                 int64
Total_Phenols           float64
Flavanoids              float64
Nonflavanoid_Phenols    float64
Proanthocyanins         float64
Color_Intensity         float64
Hue                     float64
OD280                   float64
Proline                   int64
dtype: object

In [10]:
filter_col=data1.filter(like='Ash')
filter_col

Unnamed: 0,Ash,Ash_Alcanity
0,2.43,15.6
1,2.14,11.2
2,2.67,18.6
3,2.50,16.8
4,2.87,21.0
...,...,...
173,2.45,20.5
174,2.48,23.0
175,2.26,20.0
176,2.37,20.0


In [11]:
data1.nsmallest(n=3,columns='Alcohol')

Unnamed: 0,Alcohol,Malic_Acid,Ash,Ash_Alcanity,Magnesium,Total_Phenols,Flavanoids,Nonflavanoid_Phenols,Proanthocyanins,Color_Intensity,Hue,OD280,Proline
115,11.03,1.51,2.2,21.5,85,2.46,2.17,0.52,2.01,1.9,1.71,2.87,407
113,11.41,0.74,2.5,21.0,88,2.48,2.01,0.42,1.44,3.08,1.1,2.31,434
120,11.45,2.4,2.42,20.0,96,2.9,2.79,0.32,1.83,3.25,0.8,3.39,625


In [12]:
data1.shape

(178, 13)

In [13]:
d=data1[['Alcohol','Ash','Malic_Acid']]
d

Unnamed: 0,Alcohol,Ash,Malic_Acid
0,14.23,2.43,1.71
1,13.20,2.14,1.78
2,13.16,2.67,2.36
3,14.37,2.50,1.95
4,13.24,2.87,2.59
...,...,...,...
173,13.71,2.45,5.65
174,13.40,2.48,3.91
175,13.27,2.26,4.28
176,13.17,2.37,2.59


In [14]:
uniform = np.random.uniform(low=0,high=1,size=(5,))
uniform

array([0.81785648, 0.17689624, 0.99895797, 0.80587876, 0.62437358])

In [15]:
a=np.array([[1,2,3],[4,5,6],[7,8,9]])
a

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [16]:
b=np.array([[9,8,7],[6,5,4],[3,2,1]])
b

array([[9, 8, 7],
       [6, 5, 4],
       [3, 2, 1]])

In [17]:
c=np.hstack((a,b))
c

array([[1, 2, 3, 9, 8, 7],
       [4, 5, 6, 6, 5, 4],
       [7, 8, 9, 3, 2, 1]])

In [18]:
arr=np.linspace(1,5,num=10)
arr

array([1.        , 1.44444444, 1.88888889, 2.33333333, 2.77777778,
       3.22222222, 3.66666667, 4.11111111, 4.55555556, 5.        ])

In [19]:
d=a.dot(b)
d

array([[ 30,  24,  18],
       [ 84,  69,  54],
       [138, 114,  90]])

In [20]:
f=a@b
f

array([[ 30,  24,  18],
       [ 84,  69,  54],
       [138, 114,  90]])

In [21]:
g=np.arange(1,10,2)
g

array([1, 3, 5, 7, 9])

In [22]:
h=np.split(a,3)
h

[array([[1, 2, 3]]), array([[4, 5, 6]]), array([[7, 8, 9]])]

Data Input/Output:
---------------------------------

pd.read_csv(): Read data from a CSV file into a DataFrame.

pd.read_excel(): Read data from an Excel file into a DataFrame.

pd.read_json(): Read data from a JSON file into a DataFrame.

pd.read_sql(): Read data from a SQL database into a DataFrame.

DataFrame.to_csv(): Write DataFrame to a CSV file.

DataFrame.to_excel(): Write DataFrame to an Excel file.

DataFrame.to_json(): Write DataFrame to a JSON file.

DataFrame.to_sql(): Write DataFrame to a SQL database.

Data Exploration:
-----------------

DataFrame.head(), DataFrame.tail(): View the first/last few rows of the DataFrame.

DataFrame.info(): Get information about the DataFrame, such as data types and null values.

DataFrame.describe(): Generate descriptive statistics summary of the DataFrame.

DataFrame.shape: Get the dimensions of the DataFrame (rows, columns).

DataFrame.columns: Get the column names of the DataFrame.

DataFrame.index: Get the row indices of the DataFrame.

Data Selection and Filtering:
---------------------------


DataFrame.loc[]: Access a group of rows and columns by labels.

DataFrame.iloc[]: Access a group of rows and columns by integer position.

DataFrame[...]: Select columns or filter rows based on conditions.

Data Manipulation:
-----------------


DataFrame.drop(): Drop specified labels from rows or columns.

DataFrame.rename(): Rename columns or indices.

DataFrame.sort_values(): Sort DataFrame by values along a specific axis.

DataFrame.groupby(): Group DataFrame using a mapper or by a Series of columns.

DataFrame.merge(): Merge DataFrame objects by performing a database-style join operation.

Missing Data Handling:
---------------------


DataFrame.dropna(): Drop missing values from DataFrame.

DataFrame.fillna(): Fill missing values in DataFrame with a specified value or method.

DataFrame.isna(), DataFrame.isnull(): Check for missing values.

Statistical and Mathematical Functions:
---------------------------------------


DataFrame.mean(), DataFrame.median(), DataFrame.sum(), DataFrame.std(): Calculate descriptive statistics.

DataFrame.apply(): Apply a function along an axis of the DataFrame.

DataFrame.min(), DataFrame.max(): Find minimum and maximum values.

DataFrame.unique(), DataFrame.value_counts(): Find unique values and their counts.

Visualization:
-------------


DataFrame.plot(): Create various types of plots using matplotlib.

Data Preprocessing:
------------------
    

sklearn.preprocessing: Module for data preprocessing techniques such as scaling, encoding categorical variables, and imputation of missing values.

sklearn.model_selection.train_test_split(): Split dataset into training and testing sets.

Feature Extraction and Selection:
--------------------------------


sklearn.feature_extraction: Module for feature extraction techniques such as text vectorization (e.g., CountVectorizer, TfidfVectorizer).

sklearn.feature_selection: Module for feature selection techniques.

Model Selection and Evaluation:
-------------------------------



sklearn.model_selection.cross_val_score(): Evaluate a model using cross-validation.

sklearn.model_selection.GridSearchCV(): Search for the best hyperparameters using grid search and cross-validation.

sklearn.metrics: Module for various evaluation metrics such as accuracy, precision, recall, F1-score, ROC-AUC, etc.

Supervised Learning Algorithms:
------------------------------



sklearn.linear_model: Module for linear models such as Linear Regression, Logistic Regression, Ridge Regression, Lasso Regression, etc.

sklearn.tree: Module for decision tree-based algorithms such as DecisionTreeClassifier, DecisionTreeRegressor, etc.

sklearn.ensemble: Module for ensemble methods such as Random Forest, Gradient Boosting, AdaBoost, etc.

sklearn.svm: Module for Support Vector Machine algorithms.

sklearn.neighbors: Module for k-Nearest Neighbors algorithm.

Unsupervised Learning Algorithms:
----------------------------------



sklearn.cluster: Module for clustering algorithms such as KMeans, DBSCAN, Hierarchical clustering, etc.

sklearn.decomposition: Module for dimensionality reduction techniques such as PCA, NMF, LDA, etc.

Neural Network Models:
---------------------



sklearn.neural_network: Module for basic neural network models.

Pipeline and FeatureUnion:
-------------------------



sklearn.pipeline: Module for constructing pipelines to chain multiple estimators together.

sklearn.pipeline.FeatureUnion: Combine multiple feature extraction methods into a single transformer.

Model Persistence:
-----------------

sklearn.externals.joblib: Module for saving and loading scikit-learn models.

In [23]:
new=np.random.uniform(1,101,size=(10,10))
new

array([[55.31153653, 72.1990915 , 34.6533873 , 45.02073142, 98.06107857,
        19.69362594, 79.55638617, 15.85148335,  9.12007659, 11.87331879],
       [41.65519839, 94.59575942, 85.27183389,  3.72166711, 39.18221518,
        19.48210392, 47.92581027, 42.70552399, 74.31774869, 49.37934229],
       [51.61262958, 43.94437741, 32.03496935, 43.14837163, 79.38222382,
        13.59110797, 76.53294425,  2.71407477, 29.35570131,  5.56512607],
       [13.6171984 , 66.89676857,  9.38884709, 42.78693388, 64.44824788,
        44.71950234, 86.99538062, 33.67711847, 10.48061016, 98.81773521],
       [88.71977723, 17.93970847, 16.4460728 , 34.86088114, 54.07331665,
        57.9836095 , 42.86879434, 21.83687411, 77.55675895, 46.22494813],
       [48.07768638, 84.01482759, 22.32854244, 73.31478285,  2.32735283,
        74.21931306, 81.62136568, 72.12523924, 98.19273288, 82.53602154],
       [61.63938107, 87.68617686, 31.42910098, 53.55366069, 13.07903227,
        56.34990102, 77.87431144, 85.15055221

In [24]:
df=pd.DataFrame(new)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,55.311537,72.199092,34.653387,45.020731,98.061079,19.693626,79.556386,15.851483,9.120077,11.873319
1,41.655198,94.595759,85.271834,3.721667,39.182215,19.482104,47.92581,42.705524,74.317749,49.379342
2,51.61263,43.944377,32.034969,43.148372,79.382224,13.591108,76.532944,2.714075,29.355701,5.565126
3,13.617198,66.896769,9.388847,42.786934,64.448248,44.719502,86.995381,33.677118,10.48061,98.817735
4,88.719777,17.939708,16.446073,34.860881,54.073317,57.983609,42.868794,21.836874,77.556759,46.224948
5,48.077686,84.014828,22.328542,73.314783,2.327353,74.219313,81.621366,72.125239,98.192733,82.536022
6,61.639381,87.686177,31.429101,53.553661,13.079032,56.349901,77.874311,85.150552,68.475268,1.356342
7,53.907589,32.704531,71.756819,59.15328,57.574138,73.436083,85.103661,43.318807,18.635265,66.587202
8,38.863488,72.430353,61.270605,47.439916,57.393008,5.768101,28.736973,65.733154,2.270503,23.140386
9,88.110313,69.392092,3.724095,93.372022,70.256681,27.874403,72.553293,35.521098,41.028074,11.729357
