In [1]:
import pandas as pd
import numpy as np
import datetime

### Data Frame using a list

In [2]:
lst = ['Rishab', 'Sid', 'Piyush']
df = pd.DataFrame(lst)
df

Unnamed: 0,0
0,Rishab
1,Sid
2,Piyush


### Data Frame using a Dictionary

In [3]:
data = {'Name': ['Rishab', 'Sid', 'Piyush'],
        'Age': [16, 30, 20]}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age
0,Rishab,16
1,Sid,30
2,Piyush,20


In [4]:
data = {
    "Name": ["John Doe", "Jane Smith", "David Johnson"],
    "Age": [30, 25, 35],
    "Skills": [["Python", "JavaScript"], ["Java", "C++"], ["SQL", "Data Analysis"]],
    "Qualification": ["Bachelor's Degree in Computer Science", "Master's Degree in Software Engineering", "Bachelor's Degree in Statistics"],
    "City": ["New York", "London", "San Francisco"]
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Skills,Qualification,City
0,John Doe,30,"[Python, JavaScript]",Bachelor's Degree in Computer Science,New York
1,Jane Smith,25,"[Java, C++]",Master's Degree in Software Engineering,London
2,David Johnson,35,"[SQL, Data Analysis]",Bachelor's Degree in Statistics,San Francisco


### Working with rows and columns

In [5]:
df[['Name', 'Qualification']]

Unnamed: 0,Name,Qualification
0,John Doe,Bachelor's Degree in Computer Science
1,Jane Smith,Master's Degree in Software Engineering
2,David Johnson,Bachelor's Degree in Statistics


In [6]:
# Read data from CSV file as DataFrame
df = pd.read_csv('nba.csv')
df

Unnamed: 0,Name,Team,Number,Position,Age,Height,Weight,College,Salary
0,Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
1,Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
2,John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
3,R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
4,Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...,...
453,Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
454,Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
455,Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
456,Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [7]:
df_dict = df.to_dict()
df_dict

{'Name': {0: 'Avery Bradley',
  1: 'Jae Crowder',
  2: 'John Holland',
  3: 'R.J. Hunter',
  4: 'Jonas Jerebko',
  5: 'Amir Johnson',
  6: 'Jordan Mickey',
  7: 'Kelly Olynyk',
  8: 'Terry Rozier',
  9: 'Marcus Smart',
  10: 'Jared Sullinger',
  11: 'Isaiah Thomas',
  12: 'Evan Turner',
  13: 'James Young',
  14: 'Tyler Zeller',
  15: 'Bojan Bogdanovic',
  16: 'Markel Brown',
  17: 'Wayne Ellington',
  18: 'Rondae Hollis-Jefferson',
  19: 'Jarrett Jack',
  20: 'Sergey Karasev',
  21: 'Sean Kilpatrick',
  22: 'Shane Larkin',
  23: 'Brook Lopez',
  24: 'Chris McCullough',
  25: 'Willie Reed',
  26: 'Thomas Robinson',
  27: 'Henry Sims',
  28: 'Donald Sloan',
  29: 'Thaddeus Young',
  30: 'Arron Afflalo',
  31: 'Lou Amundson',
  32: 'Thanasis Antetokounmpo',
  33: 'Carmelo Anthony',
  34: 'Jose Calderon',
  35: 'Cleanthony Early',
  36: 'Langston Galloway',
  37: 'Jerian Grant',
  38: 'Robin Lopez',
  39: "Kyle O'Quinn",
  40: 'Kristaps Porzingis',
  41: 'Kevin Seraphin',
  42: 'Lance Tho

In [8]:
df.dtypes

Name         object
Team         object
Number      float64
Position     object
Age         float64
Height       object
Weight      float64
College      object
Salary      float64
dtype: object

In [9]:
df.values

array([['Avery Bradley', 'Boston Celtics', 0.0, ..., 180.0, 'Texas',
        7730337.0],
       ['Jae Crowder', 'Boston Celtics', 99.0, ..., 235.0, 'Marquette',
        6796117.0],
       ['John Holland', 'Boston Celtics', 30.0, ..., 205.0,
        'Boston University', nan],
       ...,
       ['Tibor Pleiss', 'Utah Jazz', 21.0, ..., 256.0, nan, 2900000.0],
       ['Jeff Withey', 'Utah Jazz', 24.0, ..., 231.0, 'Kansas', 947276.0],
       [nan, nan, nan, ..., nan, nan, nan]], dtype=object)

In [10]:
df.columns

Index(['Name', 'Team', 'Number', 'Position', 'Age', 'Height', 'Weight',
       'College', 'Salary'],
      dtype='object')

In [11]:
df.shape
# (rows, columns)

(458, 9)

In [12]:
df.index

RangeIndex(start=0, stop=458, step=1)

In [13]:
df = df + df
df.shape

(458, 9)

In [14]:
from numpy import random
np.random.seed(10)
print(np.random.randint(5))
print(np.random.rand(5))
print(np.random.normal())
print(np.arange(20, 50))

1
[0.29876115 0.49458993 0.44301495 0.83191136 0.58332174]
-0.9056104817781895
[20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
 44 45 46 47 48 49]


In [15]:
pd.DataFrame(np.arange(20,30))

Unnamed: 0,0
0,20
1,21
2,22
3,23
4,24
5,25
6,26
7,27
8,28
9,29


In [28]:
# Read data from CSV file as DataFrame
df = pd.read_csv('nba.csv', index_col = "Name")
df

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0
...,...,...,...,...,...,...,...,...
Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0


In [29]:
# Top 5 results from the data Frame
df.head()

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [30]:
# Last 5 records from the DataFrame
df.tail()

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Shelvin Mack,Utah Jazz,8.0,PG,26.0,6-3,203.0,Butler,2433333.0
Raul Neto,Utah Jazz,25.0,PG,24.0,6-1,179.0,,900000.0
Tibor Pleiss,Utah Jazz,21.0,C,26.0,7-3,256.0,,2900000.0
Jeff Withey,Utah Jazz,24.0,C,26.0,7-0,231.0,Kansas,947276.0
,,,,,,,,


In [31]:
df.describe()

Unnamed: 0,Number,Age,Weight,Salary
count,457.0,457.0,457.0,446.0
mean,17.678337,26.938731,221.522976,4842684.0
std,15.96609,4.404016,26.368343,5229238.0
min,0.0,19.0,161.0,30888.0
25%,5.0,24.0,200.0,1044792.0
50%,13.0,26.0,220.0,2839073.0
75%,25.0,30.0,240.0,6500000.0
max,99.0,40.0,307.0,25000000.0


# df_dict = df.to_dict()
df_dict

In [17]:

data={"name":["Alice","Bob","Claire"],"Age":[25,30,22],"city":["New York","Los Angeles","Chicago"]}
df=pd.DataFrame(data)
df
df.loc[len(df.index)]=["David",28,"Houston"]
print(df)

     name  Age         city
0   Alice   25     New York
1     Bob   30  Los Angeles
2  Claire   22      Chicago
3   David   28      Houston


In [18]:
# Creating Objects using DataFrame: 

# Create a DataFrame named "nba_df" from the "nba.csv" dataset. 

# Display the first 5 rows of the DataFrame "nba_df". 

 
    

In [26]:
nba_df = pd.read_csv('nba.csv', index_col = "Name")
nba_df
#newnba_df= nba_df.loc[:,["Age","Weight"]]
#print(newnba_df)
#newnba_df=nba_df.iloc[:,[4,6]]
#newnba_df
rows=nba_df.loc[["John Holland"],["Age"]]
rows
    


Unnamed: 0_level_0,Age
Name,Unnamed: 1_level_1
John Holland,27.0


In [None]:
#Create a dictionary containing the information of a new NBA player: 

#: 250.0, 'College': 'St. Vincent-St. Mary', 'Salary': 39219565.0}{'Name': 'LeBron James', 'Team': 'Los Angeles Lakers', 'Number': 23.0, 'Position': 'SF', 'Age': 36.0, 'Height': '6-9', 'Weight'

data={'Name': 'LeBron James', 'Team': 'Los Angeles Lakers', 'Number': 23.0, 'Position': 'SF', 'Age': 36.0, 'Height': '6-9', 'Weight': 250.0, 'College': 'St. Vincent-St. Mary', 'Salary': 39219565.0} 
nba_df.loc[len(df.index)]=[data]
nba_df

In [None]:
#Given the following dictionary: 

data = {'Name': ['John', 'Emily', 'Michael'], 'Age': [23, 29, 35], 'City': ['Boston', 'Seattle', 'San Francisco']} 
df=pd.DataFrame(data)
df


In [None]:
basketballdata={'Name': 'LeBron James', 'Team': 'Los Angeles Lakers', 'Number': 23.0, 'Position': 'SF', 'Age': 36.0, 'Height': '6-9', 'Weight': 250.0, 'College': 'St. Vincent-St. Mary', 'Salary': 39219565.0}
df.loc[len(df.index)]=["Lebron James",36,"Los Angeles"]
df