In [1]:
import pandas as pd
import numpy as np

from itertools import permutations

In [2]:
f_months = ["فروردین","اردیبهشت","خرداد", "تیر", "مرداد","شهریور","مهر","آبان","آذر","دی","بهمن","اسفند"]
e_months = [
    "Farvardin", "Ordibehesht", "Khordad","Tir", "Mordad", "Shahrivar",
      "Mehr", "Aban", "Azar", "Dey","Bahman","Esfand"
]

f_days =[
    "اول", "دوم","سوم", "چهارم", "پنجم","ششم","هفتم","هشتم","نهم","دهم",
    "یازدهم","دوازدهم","سیزدهم","چهاردهم","پانزدهم","شانزدهم","هفدهم","هجدهم",
    "نوزدهم","بیستم","بیست و یکم","بیست و دوم","بیست و سوم","بیست و چهارم","بیست و پنجم",
    "بیست و ششم","بیست و هفتم","بیست و هشتم","بیست و نهم","سی ام"
]

f_years = [
    "هزار و چهارصد","هزار و چهارصد و یک","هزار و چهارصد و دو","هزار و چهارصد و سه",
    "هزار و چهارصد و چهار","هزار و چهارصد و پنج", "هزار و چهارصد و شش", "هزار و چهارصد و هفت",
    "هزار و چهارصد و هشت","هزار و چهارصد و نه","هزار و چهارصد و ده"
]

f_years_with_yearname = ["سال " + y for y in f_years]

days = [i for i in range(1, 32)]
years = [i for i in range(1400, 1411)]
em2int = {item: ix+1 for ix, item in enumerate(e_months)}

em2fm = {em: fm for fm, em in zip(f_months, e_months)}
ed2fd = {str(ed): fd for ed, fd in zip(days, f_days)}

ey2fy = {ey:fy for ey, fy in zip(years, f_years_with_yearname)}
fy2ey = {fy:ey for ey, fy in zip(years, f_years_with_yearname)}

### Create image-text match dataset

In [4]:
Xtr, Ytr = [], []
Xts, Yts= [], []

n_test_per_perms = 1

def get_permutes(input_list):
    perms = set(permutations(input_list))
    return [" ".join([str(p) for p in perm]) for perm in perms]

for d in days:
    for m in e_months:
        for y in years:
            temp = [d, em2fm.get(m), y] # month in persian
            perms = get_permutes(temp) # 3!=6 differente permutation
            Xtr += perms[:-n_test_per_perms]
            Xts += perms[-n_test_per_perms:]
            temp[0] = ed2fd.get(str(d)) # month and day in persian
            perms = get_permutes(temp) # 3!=6 differente permutation
            Xtr += perms[:-n_test_per_perms]
            Xts += perms[-n_test_per_perms:]
            temp[2] = ey2fy.get(y) # month and day and year in persian
            perms = get_permutes(temp) # 3!=6 differente permutation
            Xtr += perms[:-n_test_per_perms]
            Xts += perms[-n_test_per_perms:]
            temp[0] = d
            temp[1] = em2fm.get(m)
            temp[2] = ey2fy.get(y)  # month and year in persian
            perms = get_permutes(temp) # 3!=6 differente permutation
            Xtr += perms[:-n_test_per_perms]
            Xts += perms[-n_test_per_perms:]
            Ytr += [(y, em2int.get(m), d) for _ in range((6-n_test_per_perms)*4)]
            Yts += [(y, em2int.get(m), d) for _ in range(n_test_per_perms *4)]

In [6]:
len(Xts), len(Xtr)

(16368, 81840)

ما 11 سال 12 ماه و 31 روز را در نظر گرفتیم
بنابراین حالات متفاوتی که میتونن داشته باشن در کنار همدیگه بدون تغییر جایگشت بصورت زیره

11 * 12 * 31 = 4092

و از اونجا که هر کدوم از این حالات میتونه 3 فاکتوریل جایگشت متفاوت داشته باشه داریم

4092 * 3! = 4092 * 6 = 24552

این تنها برای حالتیه که از کل تاریخ غیررسمی فقط ماهش فارسی باشه. حالا اگه روزشم فارسی باشه 
باز همینقد حالت به وجود میاد پس در کل به تعداد زیر تاریخ غیررسمی داریم

24552 * 2 = 49104

In [50]:
pd.DataFrame(Xtr, columns=["input_date",]).to_excel("Xtr.xlsx")
pd.DataFrame(Ytr, columns=["year","month","day"]).to_excel("Ytr.xlsx")

### Create image-text not match dataset

In [51]:
pd.DataFrame(Xts, columns=["input_date",]).to_excel("Xts.xlsx")
pd.DataFrame(Yts, columns=["year","month","day"]).to_excel("Yts.xlsx")