In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('t1.csv',index_col='Unnamed: 0')
data 

Unnamed: 0,name,age
0,a,21
1,b,23
2,c,34
3,d,54
4,e,24
5,f,23


In [5]:
data.describe()

Unnamed: 0,age
count,6.0
mean,29.833333
std,12.703018
min,21.0
25%,23.0
50%,23.5
75%,31.5
max,54.0


In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6 entries, 0 to 5
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    6 non-null      object
 1   age     6 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 144.0+ bytes


In [9]:
data.dtypes

name    object
age      int64
dtype: object

In [10]:
data.values

array([['a', 21],
       ['b', 23],
       ['c', 34],
       ['d', 54],
       ['e', 24],
       ['f', 23]], dtype=object)

In [11]:
data.columns

Index(['name', 'age'], dtype='object')

In [13]:
columnarray = np.array(data.columns)
columnarray

array(['name', 'age'], dtype=object)

In [14]:
# saving in numpy, here data1.npz is file where numpy array will be stored, two numpy array files are stored in data1.npz which are data1 and columns1
np.savez('data1.npz', data1 = data.values, columns1 = np.array(data.columns))

In [16]:
# loading numpy array to file, just viewing into files
# allow_pickle=True means it transforms data into object from byte format as data in file are stored in byte
with np.load('data1.npz',allow_pickle=True) as file:
    print(file['data1'])
    print(file['columns1'])

[['a' 21]
 ['b' 23]
 ['c' 34]
 ['d' 54]
 ['e' 24]
 ['f' 23]]
['name' 'age']


In [19]:
# loading numpy array to
dt = np.load('data1.npz',allow_pickle=True)
print(type(dt))
dt.files

<class 'numpy.lib.npyio.NpzFile'>


['data1', 'columns1']

In [20]:
dt['data1']

array([['a', 21],
       ['b', 23],
       ['c', 34],
       ['d', 54],
       ['e', 24],
       ['f', 23]], dtype=object)

In [21]:
dt['columns1']

array(['name', 'age'], dtype=object)

In [22]:
# loading numpy array to dataframe
df = pd.DataFrame(dt['data1'], columns = dt['columns1'])
df

Unnamed: 0,name,age
0,a,21
1,b,23
2,c,34
3,d,54
4,e,24
5,f,23


In [24]:
df.dtypes

name    object
age     object
dtype: object

In [28]:
# have to change age column type object to int, astype(int) transforms age column into int32
df["age"] = df["age"].astype(int)
df.dtypes

name    object
age      int32
dtype: object

In [29]:
# recommended to transform numeric column into int64 datatype, it can be done in two ways
#df["age"] = df["age"].astype('int64')
df["age"] = df["age"].apply(int)
df.dtypes

name    object
age      int64
dtype: object

In [31]:
# find range of index
df.index

RangeIndex(start=0, stop=6, step=1)

In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   name    6 non-null      object
 1   age     6 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 224.0+ bytes


In [34]:
df.describe()

Unnamed: 0,age
count,6.0
mean,29.833333
std,12.703018
min,21.0
25%,23.0
50%,23.5
75%,31.5
max,54.0


In [36]:
# another example of Convert NumPy Array to Pandas DataFrame
# Creating a NumPy Array
my_array = np.array([[11,22,33],[44,55,66]])
print(my_array)
print(type(my_array))

[[11 22 33]
 [44 55 66]]
<class 'numpy.ndarray'>


In [37]:
# Converting NumPy Array to Pandas DataFrame
df1 = pd.DataFrame(my_array, columns = ['Column_A','Column_B','Column_C'])
print(type(df1))
df1

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,Column_A,Column_B,Column_C
0,11,22,33
1,44,55,66


In [38]:
# Adding Index to DataFrame
df1 = pd.DataFrame(my_array, columns = ['Column_A','Column_B','Column_C'], index = ['Item_1', 'Item_2'])
df1

Unnamed: 0,Column_A,Column_B,Column_C
Item_1,11,22,33
Item_2,44,55,66


In [39]:
# numpy Array Containing a Mix of Strings and Numeric Data
my_array = np.array([['Jon',25,1995,2016],['Maria',47,1973,2000],['Bill',38,1982,2005]], dtype=object)
print(my_array)
print(type(my_array))
print(my_array.dtype)

[['Jon' 25 1995 2016]
 ['Maria' 47 1973 2000]
 ['Bill' 38 1982 2005]]
<class 'numpy.ndarray'>
object


In [40]:
# Converting NumPy Array to Pandas DataFrame
df1 = pd.DataFrame(my_array, columns = ['Name','Age','Birth Year','Graduation Year'])
df1

Unnamed: 0,Name,Age,Birth Year,Graduation Year
0,Jon,25,1995,2016
1,Maria,47,1973,2000
2,Bill,38,1982,2005


In [41]:
df1.dtypes

Name               object
Age                object
Birth Year         object
Graduation Year    object
dtype: object

In [44]:
# changing datatype
df1['Age'] = df1['Age'].apply(int)
df1['Birth Year'] = df1['Birth Year'].apply(int)
df1['Graduation Year'] = df1['Graduation Year'].apply(int)
df1.dtypes

Name               object
Age                 int64
Birth Year          int64
Graduation Year     int64
dtype: object

In [45]:
df1.describe()

Unnamed: 0,Age,Birth Year,Graduation Year
count,3.0,3.0,3.0
mean,36.666667,1983.333333,2007.0
std,11.06044,11.06044,8.185353
min,25.0,1973.0,2000.0
25%,31.5,1977.5,2002.5
50%,38.0,1982.0,2005.0
75%,42.5,1988.5,2010.5
max,47.0,1995.0,2016.0
