In [1]:
import pandas as pd
import numpy as np

In [2]:
# Create Series
scores = pd.Series([8.5, 7.0, 9.0], index=['An', 'Binh', 'Lan'])
print(scores)

An      8.5
Binh    7.0
Lan     9.0
dtype: float64


In [None]:
# Access Series
print(scores['An'])  # 8.5
print(scores[0])  # 8.5
print(scores['Binh':'Lan'])  # Binh    7.0
                            # Lan     9.0

In [None]:
# Create DataFrame
data = {'name': ['An', 'Binh', 'Lan'], 'age': [20, 22, 21], 'score': [8.5, 7.0, 9.0]}
df = pd.DataFrame(data)
print(df)

In [None]:
# Filter data
high_score = df[df['score'] > 8]
print(high_score)

In [None]:
# Add column
df['pass'] = df['score'] >= 7
print(df)

In [None]:
# Statistics
print(df['score'].mean())  # 8.166...

In [None]:
# Sort DataFrame
sorted_df = df.sort_values('score', ascending=False)
print(sorted_df)

In [None]:
# Clean data
data = {'name': ['An', 'Binh', None], 'score': [8.5, None, 9.0]}
df = pd.DataFrame(data)
df = df.dropna()
print(df)

In [None]:
# Create and access Series
import pandas as pd
scores = pd.Series([8.5, 7.0, 9.0], index=['An', 'Binh', 'Lan'])
print(f"Scores:\n{scores}")
print(f"An's score: {scores['An']}")
print(f"Scores from Binh to Lan:\n{scores['Binh':'Lan']}")

In [None]:
# Create and access DataFrame
import pandas as pd
data = {'name': ['An', 'Binh', 'Lan'], 'age': [20, 22, 21], 'score': [8.5, 7.0, 9.0]}
df = pd.DataFrame(data)
print(f"DataFrame:\n{df}")
print(f"\nName column:\n{df['name']}")
print(f"\nFirst row:\n{df.iloc[0]}")

In [None]:
# Handle missing data
import pandas as pd
data = {'name': ['An', 'Binh', None], 'score': [8.5, None, 9.0]}
df = pd.DataFrame(data)
df['score'] = df['score'].fillna(df['score'].mean())
df['name'] = df['name'].fillna('Unknown')
print(f"DataFrame after filling NaN:\n{df}")

In [None]:
# Create and print Series
import pandas as pd
scores = pd.Series({'An': 8.5, 'Binh': 7.0, 'Lan': 9.0})
print(f"Scores:\n{scores}")
print(f"Average score: {scores.mean()}")

In [None]:
# Create DataFrame
import pandas as pd
data = {'name': ['An', 'Binh', 'Lan'], 'age': [20, 22, 21], 'score': [8.5, 7.0, 9.0]}
df = pd.DataFrame(data)
print(f"DataFrame:\n{df}")
print(f"\nName column:\n{df['name']}")

In [None]:
# Filter passing students
import pandas as pd
data = {'name': ['An', 'Binh', 'Lan', 'Cuong'], 'score': [8.5, 6.5, 9.0, 7.5]}
df = pd.DataFrame(data)
passed = df[df['score'] >= 7]
print(f"Passing students:\n{passed}")

In [None]:
# Filter passing students
import pandas as pd
data = {'name': ['An', 'Binh', 'Lan', 'Cuong'], 'score': [8.5, 6.5, 9.0, 7.5]}
df = pd.DataFrame(data)
passed = df[df['score'] >= 7]
print(f"Passing students:\n{passed}")

In [None]:
# Sort by score
import pandas as pd
data = {'name': ['An', 'Binh', 'Lan', 'Cuong'], 'score': [8.5, 6.5, 9.0, 7.5]}
df = pd.DataFrame(data)
sorted_df = df.sort_values('score', ascending=False)
print(f"DataFrame sorted by score:\n{sorted_df}")

In [None]:
# Handle missing data
import pandas as pd
data = {'name': ['An', 'Binh', None, 'Lan'], 'score': [8.5, None, 7.5, 9.0]}
df = pd.DataFrame(data)
df['score'] = df['score'].fillna(df['score'].mean())
df['name'] = df['name'].fillna('Unknown')
print(f"DataFrame after handling NaN:\n{df}")

In [None]:
# Analyze CSV
import pandas as pd
try:
    df = pd.read_csv('students.csv')
    mean_score = df['score'].mean()
    above_average = df[df['score'] > mean_score]
    above_average.to_csv('above_average.csv', index=False)
    print(f"Average score: {mean_score}")
    print(f"Students above average:\n{above_average}")
except FileNotFoundError:
    print("Error: File not found!")
except KeyError:
    print("Error: File does not contain 'score' column!")

In [None]:
# Normalize scores
import pandas as pd
import numpy as np
data = {'name': ['An', 'Binh', 'Lan', 'Cuong'], 'score': [8.5, 6.5, 9.0, 7.5]}
df = pd.DataFrame(data)
df['normalized_score'] = (df['score'] - df['score'].mean()) / df['score'].std()
print(f"DataFrame with normalized scores:\n{df}")