In [1]:
## Panda Pandas is a powerful and open-source Python library designed for data manipulation 
# and analysis. It is well-suited for working with tabular data, such as spreadsheets etc

In [2]:
import numpy as np
import pandas as pd

In [3]:
 # SERIES   

In [4]:
s = pd.Series([1, 3, 5, np.nan, 6, 8]) 
# A one-dimensional labeled array capable of holding data of any type. good for one column.
# like (integer, string, float, Python objects, etc.). 
# It is similar to a column in an Excel sheet
print(s)


0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64


In [5]:
s.values

array([ 1.,  3.,  5., nan,  6.,  8.])

In [None]:
s.index 

RangeIndex(start=0, stop=6, step=1)

In [56]:
s2 = pd.Series([4,6,8,10,12])

In [57]:
s2.index = ['a','b','c','d','e']

In [59]:
s2

a     4
b     6
c     8
d    10
e    12
dtype: int64

In [6]:
## DATAFRAME

In [7]:
dic1 = {
    "name":['Gauurav','Rohan','Jeetu','Bittu'],
    "marks":[47,67,83,91],
    "city":['jamshedpur','bithoor','pulgaon','jagdalpur']
}

In [8]:
data = pd.DataFrame(dic1) #convert dictionary into an excelsheet i.e csv file 
# A two-dimensional data structure with labeled axes (rows and columns),
# similar to a table in a database or an Excelsheet

In [9]:
data


Unnamed: 0,name,marks,city
0,Gauurav,47,jamshedpur
1,Rohan,67,bithoor
2,Jeetu,83,pulgaon
3,Bittu,91,jagdalpur


In [10]:
data.to_csv('bhos.csv') #after analysing data, you want to put on excelsheet then use to_csv

In [11]:
data.to_csv('bhos1.csv',index = False)

In [12]:
data.head(1)

Unnamed: 0,name,marks,city
0,Gauurav,47,jamshedpur


In [13]:
data.tail(1)

Unnamed: 0,name,marks,city
3,Bittu,91,jagdalpur


In [14]:
data.describe() # gives you statistics only for numerical columns.

Unnamed: 0,marks
count,4.0
mean,72.0
std,19.42507
min,47.0
25%,62.0
50%,75.0
75%,85.0
max,91.0


In [15]:
myData = pd.read_csv('byself.csv')

In [16]:
myData 

Unnamed: 0.1,Unnamed: 0,Train,Speed,City
0,1,12344,54,Kolkata
1,2,14789,76,shimla
2,3,23456,33,Local
3,4,14563,96,bharat


In [17]:
myData['City'][1]

'shimla'

In [18]:
myData['City'][1] = 'Durg'

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  myData['City'][1] = 'Durg'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  myData['City'][1] = 'Durg'


In [19]:
myData.to_csv('byself1.csv',index=False)

In [20]:
readingData = pd.read_csv('byself1.csv')

In [21]:
readingData

Unnamed: 0.1,Unnamed: 0,Train,Speed,City
0,1,12344,54,Kolkata
1,2,14789,76,Durg
2,3,23456,33,Local
3,4,14563,96,bharat


In [22]:
readingData.index = ['first','second','third','fourth']

In [23]:
readingData

Unnamed: 0.1,Unnamed: 0,Train,Speed,City
first,1,12344,54,Kolkata
second,2,14789,76,Durg
third,3,23456,33,Local
fourth,4,14563,96,bharat


In [24]:
readingData.shape # Tells you no. of rows & colums.

(4, 4)

In [25]:
k = readingData.columns # Tells you no of colums and dtype.
print('\n')
print(k,type(k))



Index(['Unnamed: 0', 'Train', 'Speed', 'City'], dtype='object') <class 'pandas.core.indexes.base.Index'>


In [26]:
readingData.info()
# non-Null Count if all rows showing [ 4 non-null ] then it means all rows entries null value not having.
# not-Null means data frame is good and not empty.

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, first to fourth
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  4 non-null      int64 
 1   Train       4 non-null      int64 
 2   Speed       4 non-null      int64 
 3   City        4 non-null      object
dtypes: int64(3), object(1)
memory usage: 160.0+ bytes


### ----------Series & DataFrames------------
### > Series are just one column. and one column you did'nt use. 
### > You use Tabular Data, Two-dimensional, size-mutable, potentially heterogeneous tabular data.



In [27]:
checkSeries = pd.Series(np.random.rand(5))

In [28]:
checkSeries

0    0.867314
1    0.247489
2    0.047794
3    0.433207
4    0.103632
dtype: float64

In [29]:
type(checkSeries)

pandas.core.series.Series

In [30]:
newDataFrame = pd.DataFrame([[6,7,9,2],[10,50,60,90]])

In [31]:
newDataFrame

Unnamed: 0,0,1,2,3
0,6,7,9,2
1,10,50,60,90


In [32]:
type(newDataFrame)

pandas.core.frame.DataFrame

In [33]:
newDataFrame.describe()

Unnamed: 0,0,1,2,3
count,2.0,2.0,2.0,2.0
mean,8.0,28.5,34.5,46.0
std,2.828427,30.405592,36.062446,62.225397
min,6.0,7.0,9.0,2.0
25%,7.0,17.75,21.75,24.0
50%,8.0,28.5,34.5,46.0
75%,9.0,39.25,47.25,68.0
max,10.0,50.0,60.0,90.0


In [34]:
newDataFrame.dtypes

0    int64
1    int64
2    int64
3    int64
dtype: object

In [35]:
newDataFrame.head()

Unnamed: 0,0,1,2,3
0,6,7,9,2
1,10,50,60,90


In [36]:
newDataFrame[0][0] = "Dipesh"

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  newDataFrame[0][0] = "Dipesh"
  newDataFrame[0][0] = "Dipesh"


In [37]:
newDataFrame

Unnamed: 0,0,1,2,3
0,Dipesh,7,9,2
1,10,50,60,90


In [38]:
newDataFrame.dtypes

0    object
1     int64
2     int64
3     int64
dtype: object

In [39]:
newDataFrame.index # gives you  all rows

RangeIndex(start=0, stop=2, step=1)

In [40]:
newDataFrame.columns # gives you all columns

RangeIndex(start=0, stop=4, step=1)

In [41]:
newDataFrame.to_numpy() #changes to numpy array

array([['Dipesh', 7, 9, 2],
       [10, 50, 60, 90]], dtype=object)

In [42]:
newDataFrame[0][0] = 0.333333333

In [43]:
newDataFrame

Unnamed: 0,0,1,2,3
0,0.333333,7,9,2
1,10.0,50,60,90


In [44]:
newDataFrame.dtypes

0    object
1     int64
2     int64
3     int64
dtype: object

In [45]:
newDataFrame.T # Transpose

Unnamed: 0,0,1
0,0.333333,10
1,7.0,50
2,9.0,60
3,2.0,90


In [46]:
newDataFrame.head()

Unnamed: 0,0,1,2,3
0,0.333333,7,9,2
1,10.0,50,60,90


In [47]:
newDFcopy = newDataFrame.copy() #it is a copy

In [48]:
newDFcopy[0][1] = 'Gauri'

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  newDFcopy[0][1] = 'Gauri'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  newDFcopy[0][1] = 'Gauri'


In [49]:
newDFcopy.head(2)

Unnamed: 0,0,1,2,3
0,0.333333,7,9,2
1,Gauri,50,60,90


In [50]:
newDataFrame.head(2)

Unnamed: 0,0,1,2,3
0,0.333333,7,9,2
1,10.0,50,60,90


#### newDataFrame & newDFcopy both are different, have diff memory

In [51]:
newDataFrame.columns = list("ABCDEF") # change of columns names

ValueError: Length mismatch: Expected axis has 4 elements, new values have 6 elements

In [None]:
newDataFrame.head(2)

In [None]:
newDataFrame.loc[[1,3],['B','D','F']] # does'nt change just return a new df and you can select index Row & colums names.

In [None]:
newDataFrame.head(2)

In [None]:
newDataFrame.loc[[1,5] ,:] # for All Rows & Columns use Colon

In [None]:
newDataFrame.loc[(newDataFrame['A'] < 0.2) | (newDataFrame['C'] > 0.1)]

In [None]:
newDataFrame.iloc[0,5] # for index selection

In [None]:
newDataFrame.iloc[[0,6],[1,4]]

In [None]:
newDataFrame.drop(['A','C'], axis=1) # delete row copy not effect on original

In [None]:
nd = newDataFrame.drop(['F'],axis=1) # for copy delete row

In [None]:
nd

In [None]:
newDataFrame.drop(['F','D'],axis=1, inplace =True) # inplace for originally Deletion 

In [None]:
newDataFrame

In [None]:
newDataFrame.reset_index(drop=True)