[Reference](https://towardsdatascience.com/5-lesser-known-pandas-tricks-e8ab1dd21431)

# 1. Date Ranges

In [2]:
import pandas as pd

In [3]:
date_from = "2019-01-01"
date_to = "2019-01-12"
date_range = pd.date_range(date_from, date_to, freq="D")
date_range

DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04',
               '2019-01-05', '2019-01-06', '2019-01-07', '2019-01-08',
               '2019-01-09', '2019-01-10', '2019-01-11', '2019-01-12'],
              dtype='datetime64[ns]', freq='D')

In [4]:
for i, (date_from, date_to) in enumerate(zip(date_range[:-1], date_range[1:]), 1):
    date_from = date_from.date().isoformat()
    date_to = date_to.date().isoformat()
    print("%d. date_from: %s, date_to: %s" % (i, date_from, date_to))

1. date_from: 2019-01-01, date_to: 2019-01-02
2. date_from: 2019-01-02, date_to: 2019-01-03
3. date_from: 2019-01-03, date_to: 2019-01-04
4. date_from: 2019-01-04, date_to: 2019-01-05
5. date_from: 2019-01-05, date_to: 2019-01-06
6. date_from: 2019-01-06, date_to: 2019-01-07
7. date_from: 2019-01-07, date_to: 2019-01-08
8. date_from: 2019-01-08, date_to: 2019-01-09
9. date_from: 2019-01-09, date_to: 2019-01-10
10. date_from: 2019-01-10, date_to: 2019-01-11
11. date_from: 2019-01-11, date_to: 2019-01-12


# 2. Merge with indicator

In [5]:
left = pd.DataFrame({"key": ["key1", "key2", "key3", "key4"], "value_l": [1, 2, 3, 4]})

In [6]:
right = pd.DataFrame({"key": ["key3", "key2", "key1", "key6"], "value_r": [3, 2, 1, 6]})

In [7]:
df_merge = left.merge(right, on='key', how='left', indicator=True)

In [8]:
df_merge._merge.value_counts()

both          3
left_only     1
right_only    0
Name: _merge, dtype: int64

# 3. Nearest merge

In [9]:
quotes = pd.DataFrame(
    [
        ["2016-05-25 13:30:00.023", "GOOG", 720.50, 720.93],
        ["2016-05-25 13:30:00.023", "MSFT", 51.95, 51.96],
        ["2016-05-25 13:30:00.030", "MSFT", 51.97, 51.98],
        ["2016-05-25 13:30:00.041", "MSFT", 51.99, 52.00],
        ["2016-05-25 13:30:00.048", "GOOG", 720.50, 720.93],
        ["2016-05-25 13:30:00.049", "AAPL", 97.99, 98.01],
        ["2016-05-25 13:30:00.072", "GOOG", 720.50, 720.88],
        ["2016-05-25 13:30:00.075", "MSFT", 52.01, 52.03],
    ],
    columns=["timestamp", "ticker", "bid", "ask"],
)
quotes['timestamp'] = pd.to_datetime(quotes['timestamp'])

In [10]:
trades = pd.DataFrame(
    [
        ["2016-05-25 13:30:00.023", "MSFT", 51.95, 75],
        ["2016-05-25 13:30:00.038", "MSFT", 51.95, 155],
        ["2016-05-25 13:30:00.048", "GOOG", 720.77, 100],
        ["2016-05-25 13:30:00.048", "GOOG", 720.92, 100],
        ["2016-05-25 13:30:00.048", "AAPL", 98.00, 100],
    ],
    columns=["timestamp", "ticker", "price", "quantity"],
)
trades['timestamp'] = pd.to_datetime(trades['timestamp'])

In [11]:
pd.merge_asof(trades, quotes, on="timestamp", by='ticker', tolerance=pd.Timedelta('10ms'), direction='backward')

Unnamed: 0,timestamp,ticker,price,quantity,bid,ask
0,2016-05-25 13:30:00.023,MSFT,51.95,75,51.95,51.96
1,2016-05-25 13:30:00.038,MSFT,51.95,155,51.97,51.98
2,2016-05-25 13:30:00.048,GOOG,720.77,100,720.5,720.93
3,2016-05-25 13:30:00.048,GOOG,720.92,100,720.5,720.93
4,2016-05-25 13:30:00.048,AAPL,98.0,100,,


# 4. Create an Excel report

In [12]:
df = pd.DataFrame(pd.np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=["a", "b", "c"])

  """Entry point for launching an IPython kernel.


In [16]:
!pip install xlsxwriter
import xlsxwriter
report_name = 'example_report.xlsx'
sheet_name = 'Sheet1'
writer = pd.ExcelWriter(report_name, engine='xlsxwriter')
df.to_excel(writer, sheet_name=sheet_name, index=False)
# writer.save()

Collecting xlsxwriter
[?25l  Downloading https://files.pythonhosted.org/packages/5f/ae/bc537251d5a191053467d0a656a6920d70f735e7d829e1fff8bcab9ddcc8/XlsxWriter-1.3.1-py2.py3-none-any.whl (142kB)
[K     |██▎                             | 10kB 13.6MB/s eta 0:00:01[K     |████▋                           | 20kB 2.2MB/s eta 0:00:01[K     |███████                         | 30kB 2.8MB/s eta 0:00:01[K     |█████████▏                      | 40kB 3.1MB/s eta 0:00:01[K     |███████████▌                    | 51kB 2.4MB/s eta 0:00:01[K     |█████████████▉                  | 61kB 2.7MB/s eta 0:00:01[K     |████████████████▏               | 71kB 3.1MB/s eta 0:00:01[K     |██████████████████▍             | 81kB 3.3MB/s eta 0:00:01[K     |████████████████████▊           | 92kB 3.4MB/s eta 0:00:01[K     |███████████████████████         | 102kB 3.3MB/s eta 0:00:01[K     |█████████████████████████▍      | 112kB 3.3MB/s eta 0:00:01[K     |███████████████████████████▋    | 122kB 3.3M

In [17]:
# define the workbook
workbook = writer.book
worksheet = writer.sheets[sheet_name]

# create a chart line object
chart = workbook.add_chart({'type': 'line'})

# configure the series of the chart from the spreadsheet
# using a list of values instead of category/value formulas:
#     [sheetname, first_row, first_col, last_row, last_col]
chart.add_series({
    'categories': [sheet_name, 1, 0, 3, 0],
    'values':     [sheet_name, 1, 1, 3, 1],
})

# configure the chart axes
chart.set_x_axis({'name': 'Index', 'position_axis': 'on_tick'})
chart.set_y_axis({'name': 'Value', 'major_gridlines': {'visible': False}})

# place the chart on the worksheet
worksheet.insert_chart('E2', chart)

# output the excel file
writer.save()

# 5. Save the disk space

In [18]:
df = pd.DataFrame(pd.np.random.randn(50000,300))

  """Entry point for launching an IPython kernel.


In [20]:
# df.to_csv('random_data.csv', index=False)

In [19]:
# df.to_csv('random_data.gz', compression='gzip', index=False)

In [21]:
# df = pd.read_csv('random_data.gz')